diff --git a/README.md b/README.md index f69508a..dd1386c 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ | model | android | iOS | from | |-------------------|:--------:|:--------:|:--------:| | YOLOv5s | yes | yes | [Github](https://github.com/ultralytics/yolov5) | -| NanoDet | yes | todo | [Github](https://github.com/RangiLyu/nanodet) | +| NanoDet | yes | yes | [Github](https://github.com/RangiLyu/nanodet) | ### iOS: - 如果缺少模型请从 "android_TNN_Demo\app\src\main\assets" 复制 .tnnproto 和 .tnnmodel 文件到 "iOS_TNN_Demo\TNNDemo\res" 下。 @@ -42,14 +42,15 @@ 由于TNN官方还处于开发阶段,不同时间版本可能会出现功能异常或速度差距比较大都是正常的(当前版本功能正常,但速度变慢了)。 -懒人本地转换(不会上传模型): [xxxx -> tnn] https://convertmodel.com/ +懒人本地转换(不会上传模型): [xxxx -> tnn](https://convertmodel.com/) +轻量级OpenCV:[opencv-mobile](https://github.com/nihui/opencv-mobile) :art: 截图
| Android | iOS | |:-----:|:-----:| -|| None | +|| | > Android @@ -61,7 +62,7 @@ | YOLOv5s | NanoDet | |---------|---------| -| | None | +| | | 感谢:
diff --git a/Screenshots/iOS_CPU_or_GPU.jpg b/Screenshots/iOS_CPU_or_GPU.jpg new file mode 100644 index 0000000..d88e767 Binary files /dev/null and b/Screenshots/iOS_CPU_or_GPU.jpg differ diff --git a/Screenshots/iOS_iPhone5s_nanodet.jpg b/Screenshots/iOS_iPhone5s_nanodet.jpg new file mode 100644 index 0000000..d8a2d09 Binary files /dev/null and b/Screenshots/iOS_iPhone5s_nanodet.jpg differ diff --git a/iOS_TNN_Demo/TNNDemo.xcodeproj/project.pbxproj b/iOS_TNN_Demo/TNNDemo.xcodeproj/project.pbxproj index 5744295..7330ffc 100644 --- a/iOS_TNN_Demo/TNNDemo.xcodeproj/project.pbxproj +++ b/iOS_TNN_Demo/TNNDemo.xcodeproj/project.pbxproj @@ -20,10 +20,14 @@ 5F49BA2525ABF5A10049C8A3 /* WelcomeVC.m in Sources */ = {isa = PBXBuildFile; fileRef = 5F49BA2425ABF5A10049C8A3 /* WelcomeVC.m */; }; 5F49BA2B25ABF6640049C8A3 /* ELCameraBaseCapture.m in Sources */ = {isa = PBXBuildFile; fileRef = 5F49BA2825ABF6640049C8A3 /* ELCameraBaseCapture.m */; }; 5F49BA2C25ABF6640049C8A3 /* ELCameraControlCapture.m in Sources */ = {isa = PBXBuildFile; fileRef = 5F49BA2925ABF6640049C8A3 /* ELCameraControlCapture.m */; }; - 5F7F95E025AC2A340059E65F /* tnn.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5F7F95DF25AC2A340059E65F /* tnn.framework */; }; - 5F7F95E225AC2A520059E65F /* tnn.bundle in Resources */ = {isa = PBXBuildFile; fileRef = 5F7F95E125AC2A510059E65F /* tnn.bundle */; }; + 5F5498A025FB73C5004A72DC /* opencv2.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5F54989F25FB73C5004A72DC /* opencv2.framework */; }; + 5F7799EE25FB3FBD004A53A4 /* nanodet_sim_opt.tnnmodel in Resources */ = {isa = PBXBuildFile; fileRef = 5F7799EC25FB3FBD004A53A4 /* nanodet_sim_opt.tnnmodel */; }; + 5F7799EF25FB3FBD004A53A4 /* nanodet_sim_opt.tnnproto in Resources */ = {isa = PBXBuildFile; fileRef = 5F7799ED25FB3FBD004A53A4 /* nanodet_sim_opt.tnnproto */; }; + 5F7799F325FB3FDB004A53A4 /* NanoDet.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5F7799F225FB3FDB004A53A4 /* NanoDet.mm */; }; 5F7F95E525AC2B510059E65F /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5F7F95E425AC2B510059E65F /* CoreML.framework */; }; 5F7F95EF25AC33B10059E65F /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5F7F95EE25AC33B10059E65F /* Accelerate.framework */; }; + 5FE9786A25FEE7D200D8DF1C /* tnn.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5FE9786925FEE7D200D8DF1C /* tnn.framework */; }; + 5FE9786C25FEE7D900D8DF1C /* tnn.bundle in Resources */ = {isa = PBXBuildFile; fileRef = 5FE9786B25FEE7D900D8DF1C /* tnn.bundle */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ @@ -49,10 +53,15 @@ 5F49BA2825ABF6640049C8A3 /* ELCameraBaseCapture.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = ELCameraBaseCapture.m; sourceTree = ""; }; 5F49BA2925ABF6640049C8A3 /* ELCameraControlCapture.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = ELCameraControlCapture.m; sourceTree = ""; }; 5F49BA2A25ABF6640049C8A3 /* ELCameraBaseCapture.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ELCameraBaseCapture.h; sourceTree = ""; }; - 5F7F95DF25AC2A340059E65F /* tnn.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; path = tnn.framework; sourceTree = ""; }; - 5F7F95E125AC2A510059E65F /* tnn.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = tnn.bundle; sourceTree = ""; }; + 5F54989F25FB73C5004A72DC /* opencv2.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; path = opencv2.framework; sourceTree = ""; }; + 5F7799EC25FB3FBD004A53A4 /* nanodet_sim_opt.tnnmodel */ = {isa = PBXFileReference; lastKnownFileType = file; path = nanodet_sim_opt.tnnmodel; sourceTree = ""; }; + 5F7799ED25FB3FBD004A53A4 /* nanodet_sim_opt.tnnproto */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = nanodet_sim_opt.tnnproto; sourceTree = ""; }; + 5F7799F125FB3FDB004A53A4 /* NanoDet.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NanoDet.h; sourceTree = ""; }; + 5F7799F225FB3FDB004A53A4 /* NanoDet.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = NanoDet.mm; sourceTree = ""; }; 5F7F95E425AC2B510059E65F /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = System/Library/Frameworks/CoreML.framework; sourceTree = SDKROOT; }; 5F7F95EE25AC33B10059E65F /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; + 5FE9786925FEE7D200D8DF1C /* tnn.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; path = tnn.framework; sourceTree = ""; }; + 5FE9786B25FEE7D900D8DF1C /* tnn.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = tnn.bundle; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -61,8 +70,9 @@ buildActionMask = 2147483647; files = ( 5F7F95EF25AC33B10059E65F /* Accelerate.framework in Frameworks */, + 5FE9786A25FEE7D200D8DF1C /* tnn.framework in Frameworks */, 5F7F95E525AC2B510059E65F /* CoreML.framework in Frameworks */, - 5F7F95E025AC2A340059E65F /* tnn.framework in Frameworks */, + 5F5498A025FB73C5004A72DC /* opencv2.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -72,6 +82,8 @@ 5F46C48D25ABFBF60051737C /* res */ = { isa = PBXGroup; children = ( + 5F7799EC25FB3FBD004A53A4 /* nanodet_sim_opt.tnnmodel */, + 5F7799ED25FB3FBD004A53A4 /* nanodet_sim_opt.tnnproto */, 5F46C48F25ABFC090051737C /* yolov5s_sim_opt.tnnmodel */, 5F46C48E25ABFC090051737C /* yolov5s_sim_opt.tnnproto */, ); @@ -98,8 +110,9 @@ 5F49BA0925ABF3730049C8A3 /* TNNDemo */ = { isa = PBXGroup; children = ( - 5F7F95E125AC2A510059E65F /* tnn.bundle */, - 5F7F95DF25AC2A340059E65F /* tnn.framework */, + 5FE9786B25FEE7D900D8DF1C /* tnn.bundle */, + 5FE9786925FEE7D200D8DF1C /* tnn.framework */, + 5F54989F25FB73C5004A72DC /* opencv2.framework */, 5F46C48D25ABFBF60051737C /* res */, 5F49BA2625ABF6640049C8A3 /* CameraCapture */, 5F49BA0A25ABF3730049C8A3 /* AppDelegate.h */, @@ -115,6 +128,8 @@ 5F49BA2425ABF5A10049C8A3 /* WelcomeVC.m */, 5F46C49625AC03A20051737C /* UIImage+Utility.h */, 5F46C49525AC03A20051737C /* UIImage+Utility.mm */, + 5F7799F125FB3FDB004A53A4 /* NanoDet.h */, + 5F7799F225FB3FDB004A53A4 /* NanoDet.mm */, 5F46C49325ABFC500051737C /* Yolov5.mm */, 5F46C49225ABFC500051737C /* Yolov5.h */, ); @@ -200,10 +215,12 @@ files = ( 5F49BA1A25ABF37C0049C8A3 /* LaunchScreen.storyboard in Resources */, 5F49BA1725ABF37C0049C8A3 /* Assets.xcassets in Resources */, - 5F7F95E225AC2A520059E65F /* tnn.bundle in Resources */, + 5FE9786C25FEE7D900D8DF1C /* tnn.bundle in Resources */, 5F49BA1525ABF3730049C8A3 /* Main.storyboard in Resources */, 5F46C49125ABFC090051737C /* yolov5s_sim_opt.tnnmodel in Resources */, + 5F7799EF25FB3FBD004A53A4 /* nanodet_sim_opt.tnnproto in Resources */, 5F46C49025ABFC090051737C /* yolov5s_sim_opt.tnnproto in Resources */, + 5F7799EE25FB3FBD004A53A4 /* nanodet_sim_opt.tnnmodel in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -218,6 +235,7 @@ 5F49BA1225ABF3730049C8A3 /* ViewController.mm in Sources */, 5F46C49725AC03A20051737C /* UIImage+Utility.mm in Sources */, 5F49BA2B25ABF6640049C8A3 /* ELCameraBaseCapture.m in Sources */, + 5F7799F325FB3FDB004A53A4 /* NanoDet.mm in Sources */, 5F46C49425ABFC500051737C /* Yolov5.mm in Sources */, 5F49BA0C25ABF3730049C8A3 /* AppDelegate.m in Sources */, 5F49BA1D25ABF37C0049C8A3 /* main.m in Sources */, @@ -361,12 +379,15 @@ isa = XCBuildConfiguration; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = 7RB5UWYX7Z; + ENABLE_BITCODE = NO; FRAMEWORK_SEARCH_PATHS = ( "$(inherited)", "$(PROJECT_DIR)/TNNDemo", ); + HEADER_SEARCH_PATHS = ""; INFOPLIST_FILE = TNNDemo/Info.plist; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", @@ -378,6 +399,7 @@ ); PRODUCT_BUNDLE_IDENTIFIER = com.teng.TNNDemo; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; TARGETED_DEVICE_FAMILY = "1,2"; VALID_ARCHS = "arm64 arm64e armv7 armv7s"; }; @@ -387,12 +409,15 @@ isa = XCBuildConfiguration; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = 7RB5UWYX7Z; + ENABLE_BITCODE = NO; FRAMEWORK_SEARCH_PATHS = ( "$(inherited)", "$(PROJECT_DIR)/TNNDemo", ); + HEADER_SEARCH_PATHS = ""; INFOPLIST_FILE = TNNDemo/Info.plist; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", @@ -404,6 +429,7 @@ ); PRODUCT_BUNDLE_IDENTIFIER = com.teng.TNNDemo; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; TARGETED_DEVICE_FAMILY = "1,2"; VALID_ARCHS = "arm64 arm64e armv7 armv7s"; }; diff --git a/iOS_TNN_Demo/TNNDemo/Info.plist b/iOS_TNN_Demo/TNNDemo/Info.plist index 8d48265..52a8913 100644 --- a/iOS_TNN_Demo/TNNDemo/Info.plist +++ b/iOS_TNN_Demo/TNNDemo/Info.plist @@ -2,10 +2,6 @@ - NSPhotoLibraryUsageDescription - 不让用就再见吧 - NSCameraUsageDescription - 不给就没的玩了 CFBundleDevelopmentRegion $(DEVELOPMENT_LANGUAGE) CFBundleExecutable @@ -24,6 +20,10 @@ 1 LSRequiresIPhoneOS + NSCameraUsageDescription + 不给就没的玩了 + NSPhotoLibraryUsageDescription + 不让用就再见吧 UILaunchStoryboardName LaunchScreen UIMainStoryboardFile diff --git a/iOS_TNN_Demo/TNNDemo/NanoDet.h b/iOS_TNN_Demo/TNNDemo/NanoDet.h new file mode 100644 index 0000000..2326390 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/NanoDet.h @@ -0,0 +1,103 @@ +// +// NanoDet.h +// TNNDemo +// +// Created by WZTENG on 2021/1/11. +// Copyright © 2021 TENG. All rights reserved. +// + +#ifndef NANODET_H +#define NANODET_H + +#include +#import +#import +#import +#import + +#include "tnn/core/tnn.h" +#include "tnn/core/blob.h" +#include "tnn/utils/blob_converter.h" +#include "tnn/utils/mat_utils.h" + +#include "UIImage+Utility.h" +#include + +#include "Yolov5.h" + +typedef struct { + std::string cls_layer; + std::string dis_layer; + int stride; +} HeadInfo; + +//typedef struct { +// float x1; +// float y1; +// float x2; +// float y2; +// float score; +// int label; +//} BoxInfo; + +class NanoDet { +public: + NanoDet(bool useGPU); + + ~NanoDet(); + + std::vector detect(UIImage *image, float threshold, float nms_threshold); + + std::vector labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; + +private: + void decode_infer(const std::shared_ptr& cls_pred, + const std::shared_ptr& dis_pred, + int stride, float threshold, std::vector> &results); + + BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride); + + void nms(std::vector &input_boxes, float NMS_THRESH); + +private: + std::shared_ptr net; + std::shared_ptr instance; + TNN_NS::DeviceType device_type; + + int in_n = 1; + int in_c = 3; + int in_w = 320; + int in_h = 320; + + const int num_class = 80; + const int reg_max = 7; + + std::vector heads_info{ + // cls_pred | dis_pred | stride + {"792", "795", 8}, + {"814", "817", 16}, + {"836", "839", 32}, + }; + +public: + static NanoDet *detector; + static bool hasGPU; + static bool toUseGPU; +}; + +template +int activation_function_softmax(const _Tp *src, _Tp *dst, int length); + +inline float fast_exp(float x); + +inline float sigmoid(float x); + +#endif //NANODET_H diff --git a/iOS_TNN_Demo/TNNDemo/NanoDet.mm b/iOS_TNN_Demo/TNNDemo/NanoDet.mm new file mode 100644 index 0000000..c8c1058 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/NanoDet.mm @@ -0,0 +1,310 @@ +#include "NanoDet.h" +#include + +//#include +#include +#include +#include +#include + + +NanoDet *NanoDet::detector = nullptr; +bool NanoDet::hasGPU = true; +bool NanoDet::toUseGPU = true; + +//std::shared_ptr loadModelToInfo() { +// auto modelInfo = std::make_shared(); +// +// auto library_bundle_path = [[NSBundle mainBundle] pathForResource:@"tnn" ofType:@"bundle"]; +// auto library_bundle = [NSBundle bundleWithPath:library_bundle_path]; +// auto library_path = [library_bundle pathForResource:@"tnn" ofType:@"metallib"]; +// +// auto proto_path = [[NSBundle mainBundle] pathForResource:@"nanodet_sim_opt.tnnproto" ofType:nil]; +// auto model_path = [[NSBundle mainBundle] pathForResource:@"nanodet_sim_opt.tnnmodel" ofType:nil]; +// if (proto_path.length <= 0 || model_path.length <= 0) { +// NSLog(@"Error: proto or model path is invalid"); +// return modelInfo; +// } +// std::string proto_content = [NSString stringWithContentsOfFile:proto_path encoding:NSUTF8StringEncoding error:nil].UTF8String; +// NSData *data_mode = [NSData dataWithContentsOfFile:model_path]; +// std::string model_content = [data_mode length] > 0 ? std::string((const char *)[data_mode bytes], [data_mode length]) : ""; +// if (proto_content.size() <= 0 || model_content.size() <= 0) { +// NSLog(@"Error: proto or model path is invalid"); +// return modelInfo; +// } +// modelInfo->library_path = library_path.UTF8String; +// modelInfo->proto_content = proto_content; +// modelInfo->model_content = model_content; +// +// return modelInfo; +//} + +cv::Mat cvMatWithImage(UIImage *image) { + CGColorSpaceRef colorSpace = CGImageGetColorSpace(image.CGImage); + CGFloat cols = image.size.width; + CGFloat rows = image.size.height; + + cv::Mat cvMat(rows, cols, CV_8UC4); + CGContextRef contextRef = CGBitmapContextCreate(cvMat.data, cols, rows, 8, cvMat.step[0], colorSpace, kCGImageAlphaNoneSkipLast | kCGBitmapByteOrderDefault); + + CGContextDrawImage(contextRef, CGRectMake(0, 0, cols, rows), image.CGImage); + CGContextRelease(contextRef); + return cvMat; +} + +NanoDet::NanoDet(bool useGPU) { + if (NanoDet::net == nullptr) { + auto modelInfo = loadModelToInfo("nanodet_sim_opt.tnnproto", "nanodet_sim_opt.tnnmodel"); + + TNN_NS::Status status; + TNN_NS::ModelConfig config; + config.model_type = TNN_NS::MODEL_TYPE_TNN; + config.params = {modelInfo->proto_content, modelInfo->model_content}; + auto net = std::make_shared(); + status = net->Init(config); + NanoDet::net = net; + + NanoDet::device_type = useGPU ? TNN_NS::DEVICE_METAL : TNN_NS::DEVICE_ARM; + + TNN_NS::InputShapesMap shapeMap; + TNN_NS::NetworkConfig network_config; + network_config.library_path = {modelInfo->library_path}; + network_config.device_type = NanoDet::device_type; + auto ins = NanoDet::net->CreateInst(network_config, status, shapeMap); + if (status != TNN_NS::TNN_OK || !ins) { + NSLog(@"GPU initialization failed, switch to CPU"); + // 如果出现GPU加载失败,切换到CPU + NanoDet::device_type = TNN_NS::DEVICE_ARM; + network_config.device_type = TNN_NS::DEVICE_ARM; + ins = NanoDet::net->CreateInst(network_config, status, shapeMap); + } + NanoDet::instance = ins; + + if (status != TNN_NS::TNN_OK) { + NSLog(@"TNN init failed %d", (int) status); + return; + } + } +} + +NanoDet::~NanoDet() { + NanoDet::instance = nullptr; + NanoDet::net = nullptr; +} + +std::vector NanoDet::detect(UIImage *image, float threshold, float nms_threshold) { + std::vector results; + + int image_h = image.size.height; + int image_w = image.size.width; + float scale_w = 1.0f * in_w / image_w; + float scale_h = 1.0f * in_h / image_h; + + auto imageSource = utility::UIImageGetData(image); + + // 格式转换 RGBA -> BGRA + cv::Mat dst(image_h, image_w, CV_8UC4); + cv::Mat src(image_h, image_w, CV_8UC4, imageSource.get()); + cvtColor(src, dst, cv::COLOR_RGBA2BGRA); + + // 原始图片 + TNN_NS::DeviceType dt = TNN_NS::DEVICE_ARM; // 当前数据来源始终位于CPU,不需要设置成OPENCL,tnn自动复制cpu->gpu + TNN_NS::DimsVector image_dims = {1, 4, image_h, image_w}; + auto input_mat = std::make_shared(dt, TNN_NS::N8UC4, image_dims, dst.data); // imageSource(RGBA) or dst.data(BGRA) + // 模型输入 + TNN_NS::DimsVector target_dims = {1, 4, in_h, in_w}; + auto resize_mat = std::make_shared(dt, TNN_NS::N8UC4, target_dims); + // opencl需要设置queue + void *command_queue = nullptr; + auto status = NanoDet::instance->GetCommandQueue(&command_queue); + if (status != TNN_NS::TNN_OK) { + NSLog(@"MatUtils::GetCommandQueue Error: %s", status.description().c_str()); + } + // 转换大小 + TNN_NS::ResizeParam param; + param.type = TNN_NS::InterpType::INTERP_TYPE_NEAREST; + status = TNN_NS::MatUtils::Resize(*input_mat, *resize_mat, param, command_queue); + if (status != TNN_NS::TNN_OK) { + NSLog(@"MatUtils::Resize Error: %s", status.description().c_str()); + } + + // 输入数据 + TNN_NS::MatConvertParam input_cvt_param; + input_cvt_param.scale = {0.017429f, 0.017507f, 0.017125f, 0.0}; + input_cvt_param.bias = {-(103.53f * 0.017429f), -(116.28f * 0.017507f), -(123.675f * 0.017125f), 0.0}; + status = NanoDet::instance->SetInputMat(resize_mat, input_cvt_param); + if (status != TNN_NS::TNN_OK) { + NSLog(@"instance.SetInputMat Error: %s", status.description().c_str()); + } + + // 前向 +// TNN_NS::Callback callback; +// status = NanoDet::instance->ForwardAsync(callback); + status = NanoDet::instance->Forward(); + if (status != TNN_NS::TNN_OK) { + NSLog(@"instance.Forward Error: %s", status.description().c_str()); + } + + std::vector> resultx; + resultx.resize(num_class); + // 获取数据 + for (const auto &head_info : heads_info) { + TNN_NS::MatConvertParam clsPparam; + std::shared_ptr cls_pred; + status = NanoDet::instance->GetOutputMat(cls_pred, clsPparam, head_info.cls_layer); + if (status != TNN_NS::TNN_OK) { + NSLog(@"instance.GetOutputMat Error:cls_pred %s", status.description().c_str()); + } + //TLOGD("===============> %d %d %d %d", cls_pred->GetDims()[0], cls_pred->GetDims()[1], cls_pred->GetDims()[2], cls_pred->GetDims()[3]); + + TNN_NS::MatConvertParam disParam; + std::shared_ptr dis_pred; + status = NanoDet::instance->GetOutputMat(dis_pred, disParam, head_info.dis_layer); + if (status != TNN_NS::TNN_OK) { + NSLog(@"instance.GetOutputMat Error:dis_pred %s", status.description().c_str()); + } + //TLOGD("===============> %d %d %d %d", dis_pred->GetDims()[0], dis_pred->GetDims()[1], dis_pred->GetDims()[2], dis_pred->GetDims()[3]); + + // 后处理 + decode_infer(cls_pred, dis_pred, head_info.stride, threshold, resultx); + } + + for (int i = 0; i < resultx.size(); i++) { + nms(resultx[i], nms_threshold); + for (auto box : resultx[i]) { + box.x1 = box.x1 / scale_w; + box.x2 = box.x2 / scale_w; + box.y1 = box.y1 / scale_h; + box.y2 = box.y2 / scale_h; + results.push_back(box); + } + } + +// NSLog(@"object size:%ld", results.size()); + + if (status != TNN_NS::TNN_OK) { + NSLog(@"get outputmat fail"); + return results; + } + + return results; +} + +void NanoDet::decode_infer(const std::shared_ptr &cls_pred, const std::shared_ptr &dis_pred, int stride, float threshold, std::vector> &results) { + int feature_h = in_h / stride; + int feature_w = in_w / stride; + + TNN_NS::MatType matType = cls_pred->GetMatType(); // NCHW_float + std::vector dims = cls_pred->GetDims(); + //cv::Mat debug_heatmap = cv::Mat(feature_h, feature_w, CV_8UC3); + for (int idx = 0; idx < feature_h * feature_w; idx++) { + // scores is a tensor with shape [feature_h * feature_w, num_class] +// const float *scores = cls_pred->host() + (idx * num_class); + auto *scoreTemp = static_cast(cls_pred->GetData()); + const float *scores = scoreTemp + (idx * num_class); + + int row = idx / feature_w; + int col = idx % feature_w; + float score = 0; + int cur_label = 0; + for (int label = 0; label < num_class; label++) { + if (scores[label] > score) { + score = scores[label]; + cur_label = label; + } + } + if (score > threshold && score <= 1.0) { +// NSLog(@"score: %f %s", score, labels[cur_label].c_str()); + // bbox is a tensor with shape [feature_h * feature_w, 4_points * 8_distribution_bite] +// const float *bbox_pred = dis_pred->host() + (idx * 4 * (reg_max + 1)); + auto *bboxTemp = static_cast(dis_pred->GetData()); + const float *bbox_pred = bboxTemp + (idx * 4 * (reg_max + 1)); + BoxInfo boxInfo = disPred2Bbox(bbox_pred, cur_label, score, col, row, stride); + results[cur_label].push_back(boxInfo); + //debug_heatmap.at(row, col)[0] = 255; + } + } +} + +BoxInfo NanoDet::disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride) { + float ct_x = (x + 0.5f) * stride; + float ct_y = (y + 0.5f) * stride; + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + float dis = 0; + auto *dis_after_sm = new float[reg_max + 1]; + activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); + for (int j = 0; j < reg_max + 1; j++) { + dis += j * dis_after_sm[j]; + } + dis *= stride; + dis_pred[i] = dis; + delete[] dis_after_sm; + } + float xmin = (std::max)(ct_x - dis_pred[0], .0f); + float ymin = (std::max)(ct_y - dis_pred[1], .0f); + float xmax = (std::min)(ct_x + dis_pred[2], (float) in_w); + float ymax = (std::min)(ct_y + dis_pred[3], (float) in_h); + + return BoxInfo{xmin, ymin, xmax, ymax, score, label}; +} + +void NanoDet::nms(std::vector &input_boxes, float NMS_THRESH) { + std::sort(input_boxes.begin(), input_boxes.end(), + [](BoxInfo a, BoxInfo b) { return a.score > b.score; } + ); + std::vector vArea(input_boxes.size()); + for (int i = 0; i < int(input_boxes.size()); ++i) { + vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) + * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); + } + for (int i = 0; i < int(input_boxes.size()); ++i) { + for (int j = i + 1; j < int(input_boxes.size());) { + float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); + float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); + float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); + float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); + float w = (std::max)(float(0), xx2 - xx1 + 1); + float h = (std::max)(float(0), yy2 - yy1 + 1); + float inter = w * h; + float ovr = inter / (vArea[i] + vArea[j] - inter); + if (ovr >= NMS_THRESH) { + input_boxes.erase(input_boxes.begin() + j); + vArea.erase(vArea.begin() + j); + } else { + j++; + } + } + } +} + +inline float fast_exp(float x) { + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; +} + +inline float sigmoid(float x) { + return 1.0f / (1.0f + fast_exp(-x)); +} + +template +int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { + const _Tp alpha = *std::max_element(src, src + length); + _Tp denominator{0}; + + for (int i = 0; i < length; ++i) { + dst[i] = fast_exp(src[i] - alpha); + denominator += dst[i]; + } + + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } + + return 0; +} diff --git a/iOS_TNN_Demo/TNNDemo/UIImage+Utility.h b/iOS_TNN_Demo/TNNDemo/UIImage+Utility.h index 537a94e..040d069 100644 --- a/iOS_TNN_Demo/TNNDemo/UIImage+Utility.h +++ b/iOS_TNN_Demo/TNNDemo/UIImage+Utility.h @@ -33,7 +33,7 @@ std::shared_ptr UIImageGetData(UIImage *image); * 1:resize fit the view and keep aspect, empty space may be remained zero * 2:resize to fill the view and keep aspect, no empty space remain */ -std::shared_ptr UIImageGetData(UIImage *image, int height, int width, int gravity = 0); +std::shared_ptr UIImageGetData(UIImage *image, int height, int width, int gravity); UIImage * UIImageCrop(UIImage *image, CGRect rect); diff --git a/iOS_TNN_Demo/TNNDemo/ViewController.h b/iOS_TNN_Demo/TNNDemo/ViewController.h index 6501854..6592dbf 100644 --- a/iOS_TNN_Demo/TNNDemo/ViewController.h +++ b/iOS_TNN_Demo/TNNDemo/ViewController.h @@ -9,6 +9,7 @@ #import #define W_YOLOV5S 1 +#define W_NANODET 2 @interface ViewController : UIViewController diff --git a/iOS_TNN_Demo/TNNDemo/ViewController.mm b/iOS_TNN_Demo/TNNDemo/ViewController.mm index edd81f0..4abfe3d 100644 --- a/iOS_TNN_Demo/TNNDemo/ViewController.mm +++ b/iOS_TNN_Demo/TNNDemo/ViewController.mm @@ -14,7 +14,8 @@ #include #include -#include "YoloV5.h" +#include "Yolov5.h" +#include "NanoDet.h" #import #import @@ -40,6 +41,7 @@ @interface ViewController () result; if (self.USE_MODEL == W_YOLOV5S) { result = self.yolo->detect(image, self.threshold, self.nms_threshold); + } else if (self.USE_MODEL == W_NANODET) { + result = self.nanodet->detect(image, self.threshold, self.nms_threshold); } __weak typeof(self) weakSelf = self; dispatch_sync(dispatch_get_main_queue(), ^{ - if (weakSelf.USE_MODEL == W_YOLOV5S) { + if (weakSelf.USE_MODEL == W_YOLOV5S || weakSelf.USE_MODEL == W_NANODET) { weakSelf.imageView.image = [weakSelf drawBox:weakSelf.imageView image:image boxs:result]; } @@ -302,12 +313,16 @@ - (void)createModel { if (!self.yolo && self.USE_MODEL == W_YOLOV5S) { NSLog(@"new YoloV5"); self.yolo = new YoloV5(self.USE_GPU); + } else if (!self.nanodet && self.USE_MODEL == W_NANODET) { + NSLog(@"new NanoDet"); + self.nanodet = new NanoDet(self.USE_GPU); } } - (void)releaseModel { NSLog(@"release model"); delete self.yolo; + delete self.nanodet; } #pragma mark 获取模型名称 @@ -315,6 +330,8 @@ - (NSString *)getModelName { NSString *name = @"ohhhhh"; if (self.USE_MODEL == W_YOLOV5S) { name = @"YOLOv5s"; + } else if (self.USE_MODEL == W_NANODET) { + name = @"NanoDet"; } NSString *modelName = self.USE_GPU ? [NSString stringWithFormat:@"[GPU] %@", name] : [NSString stringWithFormat:@"[CPU] %@", name]; return modelName; @@ -337,6 +354,8 @@ - (UIImage *)drawBox:(UIImageView *)imageView image:(UIImage *)image boxs:(std:: NSString *label = nil; if (self.USE_MODEL == W_YOLOV5S) { label = [NSString stringWithFormat:@"%s %.3f", self.yolo->labels[box.label].c_str(), box.score]; + } else if (self.USE_MODEL == W_NANODET) { + label = [NSString stringWithFormat:@"%s %.3f", self.nanodet->labels[box.label].c_str(), box.score]; } [label drawAtPoint:CGPointMake(box.x1 + 2, box.y1 - 3) withAttributes:@{NSFontAttributeName:[UIFont systemFontOfSize:20], NSParagraphStyleAttributeName:style, NSForegroundColorAttributeName:color}]; diff --git a/iOS_TNN_Demo/TNNDemo/WelcomeVC.m b/iOS_TNN_Demo/TNNDemo/WelcomeVC.m index 7410190..2e531d1 100644 --- a/iOS_TNN_Demo/TNNDemo/WelcomeVC.m +++ b/iOS_TNN_Demo/TNNDemo/WelcomeVC.m @@ -12,6 +12,7 @@ @interface WelcomeVC () @property (strong, nonatomic) IBOutlet UIButton *btnYolov5s; +@property (strong, nonatomic) IBOutlet UIButton *btnNanoDet; @property (strong, nonatomic) UIScrollView *scrollView; @property (strong, nonatomic) UIView *boxView; @@ -70,7 +71,7 @@ - (void)initView { int offsetY = self.view.bounds.size.width * 0.6f; int btnHeight = 35; int btnY = 35; - int btnCount = 1; + int btnCount = 2; int i = 0; self.boxView = [[UIView alloc] initWithFrame:CGRectMake(self.view.bounds.origin.x, self.view.bounds.origin.y, self.view.bounds.size.width, offsetY + btnHeight * btnCount)]; @@ -87,6 +88,12 @@ - (void)initView { [_btnYolov5s addTarget:self action:@selector(pressYolov5s:) forControlEvents:UIControlEventTouchUpInside]; [self.boxView addSubview:_btnYolov5s]; + _btnNanoDet = [[UIButton alloc] initWithFrame:CGRectMake(0, offsetY + btnY * i++, btnWidth, btnHeight)]; + [_btnNanoDet setTitleColor:[UIColor blueColor] forState:UIControlStateNormal]; + [_btnNanoDet setTitle:@"NanoDet" forState:UIControlStateNormal]; + [_btnNanoDet addTarget:self action:@selector(pressNanoDet:) forControlEvents:UIControlEventTouchUpInside]; + [self.boxView addSubview:_btnNanoDet]; + self.scrollView = [[UIScrollView alloc] initWithFrame:self.view.bounds]; self.scrollView.contentSize = self.boxView.frame.size; [self.scrollView addSubview:self.boxView]; @@ -101,6 +108,13 @@ - (void)pressYolov5s:(UIButton *)btn { [self.navigationController pushViewController:vc animated:NO]; } +- (void)pressNanoDet:(UIButton *)btn { + ViewController *vc = [self.storyboard instantiateViewControllerWithIdentifier:@"ViewController"]; + vc.USE_MODEL = W_NANODET; + vc.USE_GPU = self.useGPU; + [self.navigationController pushViewController:vc animated:NO]; +} + /* #pragma mark - Navigation diff --git a/iOS_TNN_Demo/TNNDemo/Yolov5.h b/iOS_TNN_Demo/TNNDemo/Yolov5.h index b513037..a011e64 100644 --- a/iOS_TNN_Demo/TNNDemo/Yolov5.h +++ b/iOS_TNN_Demo/TNNDemo/Yolov5.h @@ -96,4 +96,6 @@ class YoloV5 { static bool toUseGPU; }; +std::shared_ptr loadModelToInfo(std::string proto, std::string model); + #endif //YOLOV5S_H diff --git a/iOS_TNN_Demo/TNNDemo/Yolov5.mm b/iOS_TNN_Demo/TNNDemo/Yolov5.mm index 83bcf1b..23f382c 100644 --- a/iOS_TNN_Demo/TNNDemo/Yolov5.mm +++ b/iOS_TNN_Demo/TNNDemo/Yolov5.mm @@ -12,15 +12,17 @@ bool YoloV5::hasGPU = true; bool YoloV5::toUseGPU = true; -std::shared_ptr loadModelToInfo() { +std::shared_ptr loadModelToInfo(std::string proto, std::string model) { auto modelInfo = std::make_shared(); auto library_bundle_path = [[NSBundle mainBundle] pathForResource:@"tnn" ofType:@"bundle"]; auto library_bundle = [NSBundle bundleWithPath:library_bundle_path]; auto library_path = [library_bundle pathForResource:@"tnn" ofType:@"metallib"]; - auto proto_path = [[NSBundle mainBundle] pathForResource:@"yolov5s_sim_opt.tnnproto" ofType:nil]; - auto model_path = [[NSBundle mainBundle] pathForResource:@"yolov5s_sim_opt.tnnmodel" ofType:nil]; + NSString *proto_t = [NSString stringWithCString:proto.c_str() encoding:[NSString defaultCStringEncoding]]; + NSString *model_t = [NSString stringWithCString:model.c_str() encoding:[NSString defaultCStringEncoding]]; + auto proto_path = [[NSBundle mainBundle] pathForResource:proto_t ofType:nil]; + auto model_path = [[NSBundle mainBundle] pathForResource:model_t ofType:nil]; if (proto_path.length <= 0 || model_path.length <= 0) { NSLog(@"Error: proto or model path is invalid"); return modelInfo; @@ -42,7 +44,7 @@ YoloV5::YoloV5(bool useGPU) { if (YoloV5::net == nullptr) { // 加载模型 - auto modelInfo = loadModelToInfo(); + auto modelInfo = loadModelToInfo("yolov5s_sim_opt.tnnproto", "yolov5s_sim_opt.tnnmodel"); TNN_NS::Status status; TNN_NS::ModelConfig config; diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Headers b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Headers new file mode 100644 index 0000000..a177d2a --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Headers @@ -0,0 +1 @@ +Versions/Current/Headers \ No newline at end of file diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Resources b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Resources new file mode 100644 index 0000000..953ee36 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Resources @@ -0,0 +1 @@ +Versions/Current/Resources \ No newline at end of file diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core.hpp new file mode 100644 index 0000000..50af505 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core.hpp @@ -0,0 +1,3308 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2015, Intel Corporation, all rights reserved. +// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. +// Copyright (C) 2015, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_HPP +#define OPENCV_CORE_HPP + +#ifndef __cplusplus +# error core.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/version.hpp" +#include "opencv2/core/base.hpp" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/traits.hpp" +#include "opencv2/core/matx.hpp" +#include "opencv2/core/types.hpp" +#include "opencv2/core/mat.hpp" +#include "opencv2/core/persistence.hpp" + +/** +@defgroup core Core functionality +@{ + @defgroup core_basic Basic structures + @defgroup core_c C structures and operations + @{ + @defgroup core_c_glue Connections with C++ + @} + @defgroup core_array Operations on arrays + @defgroup core_async Asynchronous API + @defgroup core_xml XML/YAML Persistence + @defgroup core_cluster Clustering + @defgroup core_utils Utility and system functions and macros + @{ + @defgroup core_logging Logging facilities + @defgroup core_utils_sse SSE utilities + @defgroup core_utils_neon NEON utilities + @defgroup core_utils_vsx VSX utilities + @defgroup core_utils_softfloat Softfloat support + @defgroup core_utils_samples Utility functions for OpenCV samples + @} + @defgroup core_opengl OpenGL interoperability + @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters + @defgroup core_optim Optimization Algorithms + @defgroup core_directx DirectX interoperability + @defgroup core_eigen Eigen support + @defgroup core_opencl OpenCL support + @defgroup core_va_intel Intel VA-API/OpenCL (CL-VA) interoperability + @defgroup core_hal Hardware Acceleration Layer + @{ + @defgroup core_hal_functions Functions + @defgroup core_hal_interface Interface + @defgroup core_hal_intrin Universal intrinsics + @{ + @defgroup core_hal_intrin_impl Private implementation helpers + @} + @defgroup core_lowlevel_api Low-level API for external libraries / plugins + @} +@} + */ + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +/*! @brief Class passed to an error. + +This class encapsulates all or almost all necessary +information about the error happened in the program. The exception is +usually constructed and thrown implicitly via CV_Error and CV_Error_ macros. +@see error + */ +class CV_EXPORTS Exception : public std::exception +{ +public: + /*! + Default constructor + */ + Exception(); + /*! + Full constructor. Normally the constructor is not called explicitly. + Instead, the macros CV_Error(), CV_Error_() and CV_Assert() are used. + */ + Exception(int _code, const String& _err, const String& _func, const String& _file, int _line); + virtual ~Exception() throw(); + + /*! + \return the error description and the context as a text string. + */ + virtual const char *what() const throw() CV_OVERRIDE; + void formatMessage(); + + String msg; ///< the formatted error message + + int code; ///< error code @see CVStatus + String err; ///< error description + String func; ///< function name. Available only when the compiler supports getting it + String file; ///< source file name where the error has occurred + int line; ///< line number in the source file where the error has occurred +}; + +/*! @brief Signals an error and raises the exception. + +By default the function prints information about the error to stderr, +then it either stops if cv::setBreakOnError() had been called before or raises the exception. +It is possible to alternate error processing by using #redirectError(). +@param exc the exception raisen. +@deprecated drop this version + */ +CV_EXPORTS CV_NORETURN void error(const Exception& exc); + +enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently + SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted + //!< independently; this flag and the previous one are + //!< mutually exclusive. + SORT_ASCENDING = 0, //!< each matrix row is sorted in the ascending + //!< order. + SORT_DESCENDING = 16 //!< each matrix row is sorted in the + //!< descending order; this flag and the previous one are also + //!< mutually exclusive. + }; + +//! @} core_utils + +//! @addtogroup core +//! @{ + +//! Covariation flags +enum CovarFlags { + /** The output covariance matrix is calculated as: + \f[\texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...],\f] + The covariance matrix will be nsamples x nsamples. Such an unusual covariance matrix is used + for fast PCA of a set of very large vectors (see, for example, the EigenFaces technique for + face recognition). Eigenvalues of this "scrambled" matrix match the eigenvalues of the true + covariance matrix. The "true" eigenvectors can be easily calculated from the eigenvectors of + the "scrambled" covariance matrix. */ + COVAR_SCRAMBLED = 0, + /**The output covariance matrix is calculated as: + \f[\texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...] \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T,\f] + covar will be a square matrix of the same size as the total number of elements in each input + vector. One and only one of #COVAR_SCRAMBLED and #COVAR_NORMAL must be specified.*/ + COVAR_NORMAL = 1, + /** If the flag is specified, the function does not calculate mean from + the input vectors but, instead, uses the passed mean vector. This is useful if mean has been + pre-calculated or known in advance, or if the covariance matrix is calculated by parts. In + this case, mean is not a mean vector of the input sub-set of vectors but rather the mean + vector of the whole set.*/ + COVAR_USE_AVG = 2, + /** If the flag is specified, the covariance matrix is scaled. In the + "normal" mode, scale is 1./nsamples . In the "scrambled" mode, scale is the reciprocal of the + total number of elements in each input vector. By default (if the flag is not specified), the + covariance matrix is not scaled ( scale=1 ).*/ + COVAR_SCALE = 4, + /** If the flag is + specified, all the input vectors are stored as rows of the samples matrix. mean should be a + single-row vector in this case.*/ + COVAR_ROWS = 8, + /** If the flag is + specified, all the input vectors are stored as columns of the samples matrix. mean should be a + single-column vector in this case.*/ + COVAR_COLS = 16 +}; + +//! @addtogroup core_cluster +//! @{ + +//! k-Means flags +enum KmeansFlags { + /** Select random initial centers in each attempt.*/ + KMEANS_RANDOM_CENTERS = 0, + /** Use kmeans++ center initialization by Arthur and Vassilvitskii [Arthur2007].*/ + KMEANS_PP_CENTERS = 2, + /** During the first (and possibly the only) attempt, use the + user-supplied labels instead of computing them from the initial centers. For the second and + further attempts, use the random or semi-random centers. Use one of KMEANS_\*_CENTERS flag + to specify the exact method.*/ + KMEANS_USE_INITIAL_LABELS = 1 +}; + +//! @} core_cluster + +//! @addtogroup core_array +//! @{ + +enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix. + REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix. + REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix. + REDUCE_MIN = 3 //!< the output is the minimum (column/row-wise) of all rows/columns of the matrix. + }; + +//! @} core_array + +/** @brief Swaps two matrices +*/ +CV_EXPORTS void swap(Mat& a, Mat& b); +/** @overload */ +CV_EXPORTS void swap( UMat& a, UMat& b ); + +//! @} core + +//! @addtogroup core_array +//! @{ + +/** @brief Computes the source location of an extrapolated pixel. + +The function computes and returns the coordinate of a donor pixel corresponding to the specified +extrapolated pixel when using the specified extrapolation border mode. For example, if you use +cv::BORDER_WRAP mode in the horizontal direction, cv::BORDER_REFLECT_101 in the vertical direction and +want to compute value of the "virtual" pixel Point(-5, 100) in a floating-point image img , it +looks like: +@code{.cpp} + float val = img.at(borderInterpolate(100, img.rows, cv::BORDER_REFLECT_101), + borderInterpolate(-5, img.cols, cv::BORDER_WRAP)); +@endcode +Normally, the function is not called directly. It is used inside filtering functions and also in +copyMakeBorder. +@param p 0-based coordinate of the extrapolated pixel along one of the axes, likely \<0 or \>= len +@param len Length of the array along the corresponding axis. +@param borderType Border type, one of the #BorderTypes, except for #BORDER_TRANSPARENT and +#BORDER_ISOLATED . When borderType==#BORDER_CONSTANT , the function always returns -1, regardless +of p and len. + +@sa copyMakeBorder +*/ +CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType); + +/** @example samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp +An example using copyMakeBorder function. +Check @ref tutorial_copyMakeBorder "the corresponding tutorial" for more details +*/ + +/** @brief Forms a border around an image. + +The function copies the source image into the middle of the destination image. The areas to the +left, to the right, above and below the copied source image will be filled with extrapolated +pixels. This is not what filtering functions based on it do (they extrapolate pixels on-fly), but +what other more complex functions, including your own, may do to simplify image boundary handling. + +The function supports the mode when src is already in the middle of dst . In this case, the +function does not copy src itself but simply constructs the border, for example: + +@code{.cpp} + // let border be the same in all directions + int border=2; + // constructs a larger image to fit both the image and the border + Mat gray_buf(rgb.rows + border*2, rgb.cols + border*2, rgb.depth()); + // select the middle part of it w/o copying data + Mat gray(gray_canvas, Rect(border, border, rgb.cols, rgb.rows)); + // convert image from RGB to grayscale + cvtColor(rgb, gray, COLOR_RGB2GRAY); + // form a border in-place + copyMakeBorder(gray, gray_buf, border, border, + border, border, BORDER_REPLICATE); + // now do some custom filtering ... + ... +@endcode +@note When the source image is a part (ROI) of a bigger image, the function will try to use the +pixels outside of the ROI to form a border. To disable this feature and always do extrapolation, as +if src was not a ROI, use borderType | #BORDER_ISOLATED. + +@param src Source image. +@param dst Destination image of the same type as src and the size Size(src.cols+left+right, +src.rows+top+bottom) . +@param top the top pixels +@param bottom the bottom pixels +@param left the left pixels +@param right Parameter specifying how many pixels in each direction from the source image rectangle +to extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs +to be built. +@param borderType Border type. See borderInterpolate for details. +@param value Border value if borderType==BORDER_CONSTANT . + +@sa borderInterpolate +*/ +CV_EXPORTS_W void copyMakeBorder(InputArray src, OutputArray dst, + int top, int bottom, int left, int right, + int borderType, const Scalar& value = Scalar() ); + +/** @brief Calculates the per-element sum of two arrays or an array and a scalar. + +The function add calculates: +- Sum of two arrays when both input arrays have the same size and the same number of channels: +\f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) + \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f] +- Sum of an array and a scalar when src2 is constructed from Scalar or has the same number of +elements as `src1.channels()`: +\f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) + \texttt{src2} ) \quad \texttt{if mask}(I) \ne0\f] +- Sum of a scalar and an array when src1 is constructed from Scalar or has the same number of +elements as `src2.channels()`: +\f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1} + \texttt{src2}(I) ) \quad \texttt{if mask}(I) \ne0\f] +where `I` is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +The first function in the list above can be replaced with matrix expressions: +@code{.cpp} + dst = src1 + src2; + dst += src1; // equivalent to add(dst, src1, dst); +@endcode +The input arrays and the output array can all have the same or different depths. For example, you +can add a 16-bit unsigned array to a 8-bit signed array and store the sum as a 32-bit +floating-point array. Depth of the output array is determined by the dtype parameter. In the second +and third cases above, as well as in the first case, when src1.depth() == src2.depth(), dtype can +be set to the default -1. In this case, the output array will have the same depth as the input +array, be it src1, src2 or both. +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and number of channels as the input array(s); the +depth is defined by dtype or src1/src2. +@param mask optional operation mask - 8-bit single channel array, that specifies elements of the +output array to be changed. +@param dtype optional depth of the output array (see the discussion below). +@sa subtract, addWeighted, scaleAdd, Mat::convertTo +*/ +CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1); + +/** @brief Calculates the per-element difference between two arrays or array and a scalar. + +The function subtract calculates: +- Difference between two arrays, when both input arrays have the same size and the same number of +channels: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) - \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f] +- Difference between an array and a scalar, when src2 is constructed from Scalar or has the same +number of elements as `src1.channels()`: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) - \texttt{src2} ) \quad \texttt{if mask}(I) \ne0\f] +- Difference between a scalar and an array, when src1 is constructed from Scalar or has the same +number of elements as `src2.channels()`: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1} - \texttt{src2}(I) ) \quad \texttt{if mask}(I) \ne0\f] +- The reverse difference between a scalar and an array in the case of `SubRS`: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src2} - \texttt{src1}(I) ) \quad \texttt{if mask}(I) \ne0\f] +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +The first function in the list above can be replaced with matrix expressions: +@code{.cpp} + dst = src1 - src2; + dst -= src1; // equivalent to subtract(dst, src1, dst); +@endcode +The input arrays and the output array can all have the same or different depths. For example, you +can subtract to 8-bit unsigned arrays and store the difference in a 16-bit signed array. Depth of +the output array is determined by dtype parameter. In the second and third cases above, as well as +in the first case, when src1.depth() == src2.depth(), dtype can be set to the default -1. In this +case the output array will have the same depth as the input array, be it src1, src2 or both. +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array of the same size and the same number of channels as the input array. +@param mask optional operation mask; this is an 8-bit single channel array that specifies elements +of the output array to be changed. +@param dtype optional depth of the output array +@sa add, addWeighted, scaleAdd, Mat::convertTo + */ +CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1); + + +/** @brief Calculates the per-element scaled product of two arrays. + +The function multiply calculates the per-element product of two arrays: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{scale} \cdot \texttt{src1} (I) \cdot \texttt{src2} (I))\f] + +There is also a @ref MatrixExpressions -friendly variant of the first function. See Mat::mul . + +For a not-per-element matrix product, see gemm . + +@note Saturation is not applied when the output array has the depth +CV_32S. You may even get result of an incorrect sign in the case of +overflow. +@param src1 first input array. +@param src2 second input array of the same size and the same type as src1. +@param dst output array of the same size and type as src1. +@param scale optional scale factor. +@param dtype optional depth of the output array +@sa add, subtract, divide, scaleAdd, addWeighted, accumulate, accumulateProduct, accumulateSquare, +Mat::convertTo +*/ +CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, + OutputArray dst, double scale = 1, int dtype = -1); + +/** @brief Performs per-element division of two arrays or a scalar by an array. + +The function cv::divide divides one array by another: +\f[\texttt{dst(I) = saturate(src1(I)*scale/src2(I))}\f] +or a scalar by an array when there is no src1 : +\f[\texttt{dst(I) = saturate(scale/src2(I))}\f] + +Different channels of multi-channel arrays are processed independently. + +For integer types when src2(I) is zero, dst(I) will also be zero. + +@note In case of floating point data there is no special defined behavior for zero src2(I) values. +Regular floating-point division is used. +Expect correct IEEE-754 behaviour for floating-point data (with NaN, Inf result values). + +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array. +@param src2 second input array of the same size and type as src1. +@param scale scalar factor. +@param dst output array of the same size and type as src2. +@param dtype optional depth of the output array; if -1, dst will have depth src2.depth(), but in +case of an array-by-array division, you can only pass -1 when src1.depth()==src2.depth(). +@sa multiply, add, subtract +*/ +CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, + double scale = 1, int dtype = -1); + +/** @overload */ +CV_EXPORTS_W void divide(double scale, InputArray src2, + OutputArray dst, int dtype = -1); + +/** @brief Calculates the sum of a scaled array and another array. + +The function scaleAdd is one of the classical primitive linear algebra operations, known as DAXPY +or SAXPY in [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms). It calculates +the sum of a scaled array and another array: +\f[\texttt{dst} (I)= \texttt{scale} \cdot \texttt{src1} (I) + \texttt{src2} (I)\f] +The function can also be emulated with a matrix expression, for example: +@code{.cpp} + Mat A(3, 3, CV_64F); + ... + A.row(0) = A.row(1)*2 + A.row(2); +@endcode +@param src1 first input array. +@param alpha scale factor for the first array. +@param src2 second input array of the same size and type as src1. +@param dst output array of the same size and type as src1. +@sa add, addWeighted, subtract, Mat::dot, Mat::convertTo +*/ +CV_EXPORTS_W void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst); + +/** @example samples/cpp/tutorial_code/HighGUI/AddingImagesTrackbar.cpp +Check @ref tutorial_trackbar "the corresponding tutorial" for more details +*/ + +/** @brief Calculates the weighted sum of two arrays. + +The function addWeighted calculates the weighted sum of two arrays as follows: +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* \texttt{beta} + \texttt{gamma} )\f] +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. +The function can be replaced with a matrix expression: +@code{.cpp} + dst = src1*alpha + src2*beta + gamma; +@endcode +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array. +@param alpha weight of the first array elements. +@param src2 second input array of the same size and channel number as src1. +@param beta weight of the second array elements. +@param gamma scalar added to each sum. +@param dst output array that has the same size and number of channels as the input arrays. +@param dtype optional depth of the output array; when both input arrays have the same depth, dtype +can be set to -1, which will be equivalent to src1.depth(). +@sa add, subtract, scaleAdd, Mat::convertTo +*/ +CV_EXPORTS_W void addWeighted(InputArray src1, double alpha, InputArray src2, + double beta, double gamma, OutputArray dst, int dtype = -1); + +/** @brief Scales, calculates absolute values, and converts the result to 8-bit. + +On each element of the input array, the function convertScaleAbs +performs three operations sequentially: scaling, taking an absolute +value, conversion to an unsigned 8-bit type: +\f[\texttt{dst} (I)= \texttt{saturate\_cast} (| \texttt{src} (I)* \texttt{alpha} + \texttt{beta} |)\f] +In case of multi-channel arrays, the function processes each channel +independently. When the output is not 8-bit, the operation can be +emulated by calling the Mat::convertTo method (or by using matrix +expressions) and then by calculating an absolute value of the result. +For example: +@code{.cpp} + Mat_ A(30,30); + randu(A, Scalar(-100), Scalar(100)); + Mat_ B = A*5 + 3; + B = abs(B); + // Mat_ B = abs(A*5+3) will also do the job, + // but it will allocate a temporary matrix +@endcode +@param src input array. +@param dst output array. +@param alpha optional scale factor. +@param beta optional delta added to the scaled values. +@sa Mat::convertTo, cv::abs(const Mat&) +*/ +CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst, + double alpha = 1, double beta = 0); + +/** @brief Converts an array to half precision floating number. + +This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data. +There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or +CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error. +The format of half precision floating point is defined in IEEE 754-2008. + +@param src input array. +@param dst output array. +*/ +CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst); + +/** @brief Performs a look-up table transform of an array. + +The function LUT fills the output array with values from the look-up table. Indices of the entries +are taken from the input array. That is, the function processes each element of src as follows: +\f[\texttt{dst} (I) \leftarrow \texttt{lut(src(I) + d)}\f] +where +\f[d = \fork{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}\f] +@param src input array of 8-bit elements. +@param lut look-up table of 256 elements; in case of multi-channel input array, the table should +either have a single channel (in this case the same table is used for all channels) or the same +number of channels as in the input array. +@param dst output array of the same size and number of channels as src, and the same depth as lut. +@sa convertScaleAbs, Mat::convertTo +*/ +CV_EXPORTS_W void LUT(InputArray src, InputArray lut, OutputArray dst); + +/** @brief Calculates the sum of array elements. + +The function cv::sum calculates and returns the sum of array elements, +independently for each channel. +@param src input array that must have from 1 to 4 channels. +@sa countNonZero, mean, meanStdDev, norm, minMaxLoc, reduce +*/ +CV_EXPORTS_AS(sumElems) Scalar sum(InputArray src); + +/** @brief Counts non-zero array elements. + +The function returns the number of non-zero elements in src : +\f[\sum _{I: \; \texttt{src} (I) \ne0 } 1\f] +@param src single-channel array. +@sa mean, meanStdDev, norm, minMaxLoc, calcCovarMatrix +*/ +CV_EXPORTS_W int countNonZero( InputArray src ); + +/** @brief Returns the list of locations of non-zero pixels + +Given a binary matrix (likely returned from an operation such +as threshold(), compare(), >, ==, etc, return all of +the non-zero indices as a cv::Mat or std::vector (x,y) +For example: +@code{.cpp} + cv::Mat binaryImage; // input, binary image + cv::Mat locations; // output, locations of non-zero pixels + cv::findNonZero(binaryImage, locations); + + // access pixel coordinates + Point pnt = locations.at(i); +@endcode +or +@code{.cpp} + cv::Mat binaryImage; // input, binary image + vector locations; // output, locations of non-zero pixels + cv::findNonZero(binaryImage, locations); + + // access pixel coordinates + Point pnt = locations[i]; +@endcode +@param src single-channel array +@param idx the output array, type of cv::Mat or std::vector, corresponding to non-zero indices in the input +*/ +CV_EXPORTS_W void findNonZero( InputArray src, OutputArray idx ); + +/** @brief Calculates an average (mean) of array elements. + +The function cv::mean calculates the mean value M of array elements, +independently for each channel, and return it: +\f[\begin{array}{l} N = \sum _{I: \; \texttt{mask} (I) \ne 0} 1 \\ M_c = \left ( \sum _{I: \; \texttt{mask} (I) \ne 0}{ \texttt{mtx} (I)_c} \right )/N \end{array}\f] +When all the mask elements are 0's, the function returns Scalar::all(0) +@param src input array that should have from 1 to 4 channels so that the result can be stored in +Scalar_ . +@param mask optional operation mask. +@sa countNonZero, meanStdDev, norm, minMaxLoc +*/ +CV_EXPORTS_W Scalar mean(InputArray src, InputArray mask = noArray()); + +/** Calculates a mean and standard deviation of array elements. + +The function cv::meanStdDev calculates the mean and the standard deviation M +of array elements independently for each channel and returns it via the +output parameters: +\f[\begin{array}{l} N = \sum _{I, \texttt{mask} (I) \ne 0} 1 \\ \texttt{mean} _c = \frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \texttt{src} (I)_c}{N} \\ \texttt{stddev} _c = \sqrt{\frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \left ( \texttt{src} (I)_c - \texttt{mean} _c \right )^2}{N}} \end{array}\f] +When all the mask elements are 0's, the function returns +mean=stddev=Scalar::all(0). +@note The calculated standard deviation is only the diagonal of the +complete normalized covariance matrix. If the full matrix is needed, you +can reshape the multi-channel array M x N to the single-channel array +M\*N x mtx.channels() (only possible when the matrix is continuous) and +then pass the matrix to calcCovarMatrix . +@param src input array that should have from 1 to 4 channels so that the results can be stored in +Scalar_ 's. +@param mean output parameter: calculated mean value. +@param stddev output parameter: calculated standard deviation. +@param mask optional operation mask. +@sa countNonZero, mean, norm, minMaxLoc, calcCovarMatrix +*/ +CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray mean, OutputArray stddev, + InputArray mask=noArray()); + +/** @brief Calculates the absolute norm of an array. + +This version of #norm calculates the absolute norm of src1. The type of norm to calculate is specified using #NormTypes. + +As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$. +The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$ +is calculated as follows +\f{align*} + \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\ + \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\ + \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2 +\f} +and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is +\f{align*} + \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\ + \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\ + \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5. +\f} +The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$. +It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$. +![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png) + +When the mask parameter is specified and it is not empty, the norm is + +If normType is not specified, #NORM_L2 is used. +calculated only over the region specified by the mask. + +Multi-channel input arrays are treated as single-channel arrays, that is, +the results for all channels are combined. + +Hamming norms can only be calculated with CV_8U depth arrays. + +@param src1 first input array. +@param normType type of the norm (see #NormTypes). +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +*/ +CV_EXPORTS_W double norm(InputArray src1, int normType = NORM_L2, InputArray mask = noArray()); + +/** @brief Calculates an absolute difference norm or a relative difference norm. + +This version of cv::norm calculates the absolute difference norm +or the relative difference norm of arrays src1 and src2. +The type of norm to calculate is specified using #NormTypes. + +@param src1 first input array. +@param src2 second input array of the same size and the same type as src1. +@param normType type of the norm (see #NormTypes). +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +*/ +CV_EXPORTS_W double norm(InputArray src1, InputArray src2, + int normType = NORM_L2, InputArray mask = noArray()); +/** @overload +@param src first input array. +@param normType type of the norm (see #NormTypes). +*/ +CV_EXPORTS double norm( const SparseMat& src, int normType ); + +/** @brief Computes the Peak Signal-to-Noise Ratio (PSNR) image quality metric. + +This function calculates the Peak Signal-to-Noise Ratio (PSNR) image quality metric in decibels (dB), +between two input arrays src1 and src2. The arrays must have the same type. + +The PSNR is calculated as follows: + +\f[ +\texttt{PSNR} = 10 \cdot \log_{10}{\left( \frac{R^2}{MSE} \right) } +\f] + +where R is the maximum integer value of depth (e.g. 255 in the case of CV_8U data) +and MSE is the mean squared error between the two arrays. + +@param src1 first input array. +@param src2 second input array of the same size as src1. +@param R the maximum pixel value (255 by default) + + */ +CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2, double R=255.); + +/** @brief naive nearest neighbor finder + +see http://en.wikipedia.org/wiki/Nearest_neighbor_search +@todo document + */ +CV_EXPORTS_W void batchDistance(InputArray src1, InputArray src2, + OutputArray dist, int dtype, OutputArray nidx, + int normType = NORM_L2, int K = 0, + InputArray mask = noArray(), int update = 0, + bool crosscheck = false); + +/** @brief Normalizes the norm or value range of an array. + +The function cv::normalize normalizes scale and shift the input array elements so that +\f[\| \texttt{dst} \| _{L_p}= \texttt{alpha}\f] +(where p=Inf, 1 or 2) when normType=NORM_INF, NORM_L1, or NORM_L2, respectively; or so that +\f[\min _I \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I \texttt{dst} (I)= \texttt{beta}\f] + +when normType=NORM_MINMAX (for dense arrays only). The optional mask specifies a sub-array to be +normalized. This means that the norm or min-n-max are calculated over the sub-array, and then this +sub-array is modified to be normalized. If you want to only use the mask to calculate the norm or +min-max but modify the whole array, you can use norm and Mat::convertTo. + +In case of sparse matrices, only the non-zero values are analyzed and transformed. Because of this, +the range transformation for sparse matrices is not allowed since it can shift the zero level. + +Possible usage with some positive example data: +@code{.cpp} + vector positiveData = { 2.0, 8.0, 10.0 }; + vector normalizedData_l1, normalizedData_l2, normalizedData_inf, normalizedData_minmax; + + // Norm to probability (total count) + // sum(numbers) = 20.0 + // 2.0 0.1 (2.0/20.0) + // 8.0 0.4 (8.0/20.0) + // 10.0 0.5 (10.0/20.0) + normalize(positiveData, normalizedData_l1, 1.0, 0.0, NORM_L1); + + // Norm to unit vector: ||positiveData|| = 1.0 + // 2.0 0.15 + // 8.0 0.62 + // 10.0 0.77 + normalize(positiveData, normalizedData_l2, 1.0, 0.0, NORM_L2); + + // Norm to max element + // 2.0 0.2 (2.0/10.0) + // 8.0 0.8 (8.0/10.0) + // 10.0 1.0 (10.0/10.0) + normalize(positiveData, normalizedData_inf, 1.0, 0.0, NORM_INF); + + // Norm to range [0.0;1.0] + // 2.0 0.0 (shift to left border) + // 8.0 0.75 (6.0/8.0) + // 10.0 1.0 (shift to right border) + normalize(positiveData, normalizedData_minmax, 1.0, 0.0, NORM_MINMAX); +@endcode + +@param src input array. +@param dst output array of the same size as src . +@param alpha norm value to normalize to or the lower range boundary in case of the range +normalization. +@param beta upper range boundary in case of the range normalization; it is not used for the norm +normalization. +@param norm_type normalization type (see cv::NormTypes). +@param dtype when negative, the output array has the same type as src; otherwise, it has the same +number of channels as src and the depth =CV_MAT_DEPTH(dtype). +@param mask optional operation mask. +@sa norm, Mat::convertTo, SparseMat::convertTo +*/ +CV_EXPORTS_W void normalize( InputArray src, InputOutputArray dst, double alpha = 1, double beta = 0, + int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray()); + +/** @overload +@param src input array. +@param dst output array of the same size as src . +@param alpha norm value to normalize to or the lower range boundary in case of the range +normalization. +@param normType normalization type (see cv::NormTypes). +*/ +CV_EXPORTS void normalize( const SparseMat& src, SparseMat& dst, double alpha, int normType ); + +/** @brief Finds the global minimum and maximum in an array. + +The function cv::minMaxLoc finds the minimum and maximum element values and their positions. The +extremums are searched across the whole array or, if mask is not an empty array, in the specified +array region. + +The function do not work with multi-channel arrays. If you need to find minimum or maximum +elements across all the channels, use Mat::reshape first to reinterpret the array as +single-channel. Or you may extract the particular channel using either extractImageCOI , or +mixChannels , or split . +@param src input single-channel array. +@param minVal pointer to the returned minimum value; NULL is used if not required. +@param maxVal pointer to the returned maximum value; NULL is used if not required. +@param minLoc pointer to the returned minimum location (in 2D case); NULL is used if not required. +@param maxLoc pointer to the returned maximum location (in 2D case); NULL is used if not required. +@param mask optional mask used to select a sub-array. +@sa max, min, compare, inRange, extractImageCOI, mixChannels, split, Mat::reshape +*/ +CV_EXPORTS_W void minMaxLoc(InputArray src, CV_OUT double* minVal, + CV_OUT double* maxVal = 0, CV_OUT Point* minLoc = 0, + CV_OUT Point* maxLoc = 0, InputArray mask = noArray()); + + +/** @brief Finds the global minimum and maximum in an array + +The function cv::minMaxIdx finds the minimum and maximum element values and their positions. The +extremums are searched across the whole array or, if mask is not an empty array, in the specified +array region. The function does not work with multi-channel arrays. If you need to find minimum or +maximum elements across all the channels, use Mat::reshape first to reinterpret the array as +single-channel. Or you may extract the particular channel using either extractImageCOI , or +mixChannels , or split . In case of a sparse matrix, the minimum is found among non-zero elements +only. +@note When minIdx is not NULL, it must have at least 2 elements (as well as maxIdx), even if src is +a single-row or single-column matrix. In OpenCV (following MATLAB) each array has at least 2 +dimensions, i.e. single-column matrix is Mx1 matrix (and therefore minIdx/maxIdx will be +(i1,0)/(i2,0)) and single-row matrix is 1xN matrix (and therefore minIdx/maxIdx will be +(0,j1)/(0,j2)). +@param src input single-channel array. +@param minVal pointer to the returned minimum value; NULL is used if not required. +@param maxVal pointer to the returned maximum value; NULL is used if not required. +@param minIdx pointer to the returned minimum location (in nD case); NULL is used if not required; +Otherwise, it must point to an array of src.dims elements, the coordinates of the minimum element +in each dimension are stored there sequentially. +@param maxIdx pointer to the returned maximum location (in nD case). NULL is used if not required. +@param mask specified array region +*/ +CV_EXPORTS void minMaxIdx(InputArray src, double* minVal, double* maxVal = 0, + int* minIdx = 0, int* maxIdx = 0, InputArray mask = noArray()); + +/** @overload +@param a input single-channel array. +@param minVal pointer to the returned minimum value; NULL is used if not required. +@param maxVal pointer to the returned maximum value; NULL is used if not required. +@param minIdx pointer to the returned minimum location (in nD case); NULL is used if not required; +Otherwise, it must point to an array of src.dims elements, the coordinates of the minimum element +in each dimension are stored there sequentially. +@param maxIdx pointer to the returned maximum location (in nD case). NULL is used if not required. +*/ +CV_EXPORTS void minMaxLoc(const SparseMat& a, double* minVal, + double* maxVal, int* minIdx = 0, int* maxIdx = 0); + +/** @brief Reduces a matrix to a vector. + +The function #reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of +1D vectors and performing the specified operation on the vectors until a single row/column is +obtained. For example, the function can be used to compute horizontal and vertical projections of a +raster image. In case of #REDUCE_MAX and #REDUCE_MIN , the output image should have the same type as the source one. +In case of #REDUCE_SUM and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy. +And multi-channel arrays are also supported in these two reduction modes. + +The following code demonstrates its usage for a single channel matrix. +@snippet snippets/core_reduce.cpp example + +And the following code demonstrates its usage for a two-channel matrix. +@snippet snippets/core_reduce.cpp example2 + +@param src input 2D matrix. +@param dst output vector. Its size and type is defined by dim and dtype parameters. +@param dim dimension index along which the matrix is reduced. 0 means that the matrix is reduced to +a single row. 1 means that the matrix is reduced to a single column. +@param rtype reduction operation that could be one of #ReduceTypes +@param dtype when negative, the output vector will have the same type as the input matrix, +otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()). +@sa repeat +*/ +CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, int dtype = -1); + +/** @brief Creates one multi-channel array out of several single-channel ones. + +The function cv::merge merges several arrays to make a single multi-channel array. That is, each +element of the output array will be a concatenation of the elements of the input arrays, where +elements of i-th input array are treated as mv[i].channels()-element vectors. + +The function cv::split does the reverse operation. If you need to shuffle channels in some other +advanced way, use cv::mixChannels. + +The following example shows how to merge 3 single channel matrices into a single 3-channel matrix. +@snippet snippets/core_merge.cpp example + +@param mv input array of matrices to be merged; all the matrices in mv must have the same +size and the same depth. +@param count number of input matrices when mv is a plain C array; it must be greater than zero. +@param dst output array of the same size and the same depth as mv[0]; The number of channels will +be equal to the parameter count. +@sa mixChannels, split, Mat::reshape +*/ +CV_EXPORTS void merge(const Mat* mv, size_t count, OutputArray dst); + +/** @overload +@param mv input vector of matrices to be merged; all the matrices in mv must have the same +size and the same depth. +@param dst output array of the same size and the same depth as mv[0]; The number of channels will +be the total number of channels in the matrix array. + */ +CV_EXPORTS_W void merge(InputArrayOfArrays mv, OutputArray dst); + +/** @brief Divides a multi-channel array into several single-channel arrays. + +The function cv::split splits a multi-channel array into separate single-channel arrays: +\f[\texttt{mv} [c](I) = \texttt{src} (I)_c\f] +If you need to extract a single channel or do some other sophisticated channel permutation, use +mixChannels . + +The following example demonstrates how to split a 3-channel matrix into 3 single channel matrices. +@snippet snippets/core_split.cpp example + +@param src input multi-channel array. +@param mvbegin output array; the number of arrays must match src.channels(); the arrays themselves are +reallocated, if needed. +@sa merge, mixChannels, cvtColor +*/ +CV_EXPORTS void split(const Mat& src, Mat* mvbegin); + +/** @overload +@param m input multi-channel array. +@param mv output vector of arrays; the arrays themselves are reallocated, if needed. +*/ +CV_EXPORTS_W void split(InputArray m, OutputArrayOfArrays mv); + +/** @brief Copies specified channels from input arrays to the specified channels of +output arrays. + +The function cv::mixChannels provides an advanced mechanism for shuffling image channels. + +cv::split,cv::merge,cv::extractChannel,cv::insertChannel and some forms of cv::cvtColor are partial cases of cv::mixChannels. + +In the example below, the code splits a 4-channel BGRA image into a 3-channel BGR (with B and R +channels swapped) and a separate alpha-channel image: +@code{.cpp} + Mat bgra( 100, 100, CV_8UC4, Scalar(255,0,0,255) ); + Mat bgr( bgra.rows, bgra.cols, CV_8UC3 ); + Mat alpha( bgra.rows, bgra.cols, CV_8UC1 ); + + // forming an array of matrices is a quite efficient operation, + // because the matrix data is not copied, only the headers + Mat out[] = { bgr, alpha }; + // bgra[0] -> bgr[2], bgra[1] -> bgr[1], + // bgra[2] -> bgr[0], bgra[3] -> alpha[0] + int from_to[] = { 0,2, 1,1, 2,0, 3,3 }; + mixChannels( &bgra, 1, out, 2, from_to, 4 ); +@endcode +@note Unlike many other new-style C++ functions in OpenCV (see the introduction section and +Mat::create ), cv::mixChannels requires the output arrays to be pre-allocated before calling the +function. +@param src input array or vector of matrices; all of the matrices must have the same size and the +same depth. +@param nsrcs number of matrices in `src`. +@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and +depth must be the same as in `src[0]`. +@param ndsts number of matrices in `dst`. +@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is +a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in +dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to +src[0].channels()-1, the second input image channels are indexed from src[0].channels() to +src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image +channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is +filled with zero . +@param npairs number of index pairs in `fromTo`. +@sa split, merge, extractChannel, insertChannel, cvtColor +*/ +CV_EXPORTS void mixChannels(const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts, + const int* fromTo, size_t npairs); + +/** @overload +@param src input array or vector of matrices; all of the matrices must have the same size and the +same depth. +@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and +depth must be the same as in src[0]. +@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is +a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in +dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to +src[0].channels()-1, the second input image channels are indexed from src[0].channels() to +src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image +channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is +filled with zero . +@param npairs number of index pairs in fromTo. +*/ +CV_EXPORTS void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst, + const int* fromTo, size_t npairs); + +/** @overload +@param src input array or vector of matrices; all of the matrices must have the same size and the +same depth. +@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and +depth must be the same as in src[0]. +@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is +a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in +dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to +src[0].channels()-1, the second input image channels are indexed from src[0].channels() to +src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image +channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is +filled with zero . +*/ +CV_EXPORTS_W void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst, + const std::vector& fromTo); + +/** @brief Extracts a single channel from src (coi is 0-based index) +@param src input array +@param dst output array +@param coi index of channel to extract +@sa mixChannels, split +*/ +CV_EXPORTS_W void extractChannel(InputArray src, OutputArray dst, int coi); + +/** @brief Inserts a single channel to dst (coi is 0-based index) +@param src input array +@param dst output array +@param coi index of channel for insertion +@sa mixChannels, merge +*/ +CV_EXPORTS_W void insertChannel(InputArray src, InputOutputArray dst, int coi); + +/** @brief Flips a 2D array around vertical, horizontal, or both axes. + +The function cv::flip flips the array in one of three different ways (row +and column indices are 0-based): +\f[\texttt{dst} _{ij} = +\left\{ +\begin{array}{l l} +\texttt{src} _{\texttt{src.rows}-i-1,j} & if\; \texttt{flipCode} = 0 \\ +\texttt{src} _{i, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} > 0 \\ +\texttt{src} _{ \texttt{src.rows} -i-1, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} < 0 \\ +\end{array} +\right.\f] +The example scenarios of using the function are the following: +* Vertical flipping of the image (flipCode == 0) to switch between + top-left and bottom-left image origin. This is a typical operation + in video processing on Microsoft Windows\* OS. +* Horizontal flipping of the image with the subsequent horizontal + shift and absolute difference calculation to check for a + vertical-axis symmetry (flipCode \> 0). +* Simultaneous horizontal and vertical flipping of the image with + the subsequent shift and absolute difference calculation to check + for a central symmetry (flipCode \< 0). +* Reversing the order of point arrays (flipCode \> 0 or + flipCode == 0). +@param src input array. +@param dst output array of the same size and type as src. +@param flipCode a flag to specify how to flip the array; 0 means +flipping around the x-axis and positive value (for example, 1) means +flipping around y-axis. Negative value (for example, -1) means flipping +around both axes. +@sa transpose , repeat , completeSymm +*/ +CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode); + +enum RotateFlags { + ROTATE_90_CLOCKWISE = 0, //! A = (cv::Mat_(3, 2) << 1, 4, + 2, 5, + 3, 6); + cv::Mat_ B = (cv::Mat_(3, 2) << 7, 10, + 8, 11, + 9, 12); + + cv::Mat C; + cv::hconcat(A, B, C); + //C: + //[1, 4, 7, 10; + // 2, 5, 8, 11; + // 3, 6, 9, 12] + @endcode + @param src1 first input array to be considered for horizontal concatenation. + @param src2 second input array to be considered for horizontal concatenation. + @param dst output array. It has the same number of rows and depth as the src1 and src2, and the sum of cols of the src1 and src2. + */ +CV_EXPORTS void hconcat(InputArray src1, InputArray src2, OutputArray dst); +/** @overload + @code{.cpp} + std::vector matrices = { cv::Mat(4, 1, CV_8UC1, cv::Scalar(1)), + cv::Mat(4, 1, CV_8UC1, cv::Scalar(2)), + cv::Mat(4, 1, CV_8UC1, cv::Scalar(3)),}; + + cv::Mat out; + cv::hconcat( matrices, out ); + //out: + //[1, 2, 3; + // 1, 2, 3; + // 1, 2, 3; + // 1, 2, 3] + @endcode + @param src input array or vector of matrices. all of the matrices must have the same number of rows and the same depth. + @param dst output array. It has the same number of rows and depth as the src, and the sum of cols of the src. +same depth. + */ +CV_EXPORTS_W void hconcat(InputArrayOfArrays src, OutputArray dst); + +/** @brief Applies vertical concatenation to given matrices. + +The function vertically concatenates two or more cv::Mat matrices (with the same number of cols). +@code{.cpp} + cv::Mat matArray[] = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),}; + + cv::Mat out; + cv::vconcat( matArray, 3, out ); + //out: + //[1, 1, 1, 1; + // 2, 2, 2, 2; + // 3, 3, 3, 3] +@endcode +@param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth. +@param nsrc number of matrices in src. +@param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src. +@sa cv::hconcat(const Mat*, size_t, OutputArray), @sa cv::hconcat(InputArrayOfArrays, OutputArray) and @sa cv::hconcat(InputArray, InputArray, OutputArray) +*/ +CV_EXPORTS void vconcat(const Mat* src, size_t nsrc, OutputArray dst); +/** @overload + @code{.cpp} + cv::Mat_ A = (cv::Mat_(3, 2) << 1, 7, + 2, 8, + 3, 9); + cv::Mat_ B = (cv::Mat_(3, 2) << 4, 10, + 5, 11, + 6, 12); + + cv::Mat C; + cv::vconcat(A, B, C); + //C: + //[1, 7; + // 2, 8; + // 3, 9; + // 4, 10; + // 5, 11; + // 6, 12] + @endcode + @param src1 first input array to be considered for vertical concatenation. + @param src2 second input array to be considered for vertical concatenation. + @param dst output array. It has the same number of cols and depth as the src1 and src2, and the sum of rows of the src1 and src2. + */ +CV_EXPORTS void vconcat(InputArray src1, InputArray src2, OutputArray dst); +/** @overload + @code{.cpp} + std::vector matrices = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),}; + + cv::Mat out; + cv::vconcat( matrices, out ); + //out: + //[1, 1, 1, 1; + // 2, 2, 2, 2; + // 3, 3, 3, 3] + @endcode + @param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth + @param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src. +same depth. + */ +CV_EXPORTS_W void vconcat(InputArrayOfArrays src, OutputArray dst); + +/** @brief computes bitwise conjunction of the two arrays (dst = src1 & src2) +Calculates the per-element bit-wise conjunction of two arrays or an +array and a scalar. + +The function cv::bitwise_and calculates the per-element bit-wise logical conjunction for: +* Two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +* An array and a scalar when src2 is constructed from Scalar or has + the same number of elements as `src1.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} (I) \wedge \texttt{src2} \quad \texttt{if mask} (I) \ne0\f] +* A scalar and an array when src1 is constructed from Scalar or has + the same number of elements as `src2.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +In case of floating-point arrays, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel arrays, each channel is processed +independently. In the second and third cases above, the scalar is first +converted to the array type. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as the input +arrays. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, + OutputArray dst, InputArray mask = noArray()); + +/** @brief Calculates the per-element bit-wise disjunction of two arrays or an +array and a scalar. + +The function cv::bitwise_or calculates the per-element bit-wise logical disjunction for: +* Two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +* An array and a scalar when src2 is constructed from Scalar or has + the same number of elements as `src1.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} (I) \vee \texttt{src2} \quad \texttt{if mask} (I) \ne0\f] +* A scalar and an array when src1 is constructed from Scalar or has + the same number of elements as `src2.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +In case of floating-point arrays, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel arrays, each channel is processed +independently. In the second and third cases above, the scalar is first +converted to the array type. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as the input +arrays. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, + OutputArray dst, InputArray mask = noArray()); + +/** @brief Calculates the per-element bit-wise "exclusive or" operation on two +arrays or an array and a scalar. + +The function cv::bitwise_xor calculates the per-element bit-wise logical "exclusive-or" +operation for: +* Two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +* An array and a scalar when src2 is constructed from Scalar or has + the same number of elements as `src1.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} (I) \oplus \texttt{src2} \quad \texttt{if mask} (I) \ne0\f] +* A scalar and an array when src1 is constructed from Scalar or has + the same number of elements as `src2.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +In case of floating-point arrays, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel arrays, each channel is processed +independently. In the 2nd and 3rd cases above, the scalar is first +converted to the array type. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as the input +arrays. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, + OutputArray dst, InputArray mask = noArray()); + +/** @brief Inverts every bit of an array. + +The function cv::bitwise_not calculates per-element bit-wise inversion of the input +array: +\f[\texttt{dst} (I) = \neg \texttt{src} (I)\f] +In case of a floating-point input array, its machine-specific bit +representation (usually IEEE754-compliant) is used for the operation. In +case of multi-channel arrays, each channel is processed independently. +@param src input array. +@param dst output array that has the same size and type as the input +array. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst, + InputArray mask = noArray()); + +/** @brief Calculates the per-element absolute difference between two arrays or between an array and a scalar. + +The function cv::absdiff calculates: +* Absolute difference between two arrays when they have the same + size and type: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1}(I) - \texttt{src2}(I)|)\f] +* Absolute difference between an array and a scalar when the second + array is constructed from Scalar or has as many elements as the + number of channels in `src1`: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1}(I) - \texttt{src2} |)\f] +* Absolute difference between a scalar and an array when the first + array is constructed from Scalar or has as many elements as the + number of channels in `src2`: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1} - \texttt{src2}(I) |)\f] + where I is a multi-dimensional index of array elements. In case of + multi-channel arrays, each channel is processed independently. +@note Saturation is not applied when the arrays have the depth CV_32S. +You may even get a negative value in the case of overflow. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as input arrays. +@sa cv::abs(const Mat&) +*/ +CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst); + +/** @brief This is an overloaded member function, provided for convenience (python) +Copies the matrix to another one. +When the operation mask is specified, if the Mat::create call shown above reallocates the matrix, the newly allocated matrix is initialized with all zeros before copying the data. +@param src source matrix. +@param dst Destination matrix. If it does not have a proper size or type before the operation, it is +reallocated. +@param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix +elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels. +*/ + +void CV_EXPORTS_W copyTo(InputArray src, OutputArray dst, InputArray mask); +/** @brief Checks if array elements lie between the elements of two other arrays. + +The function checks the range as follows: +- For every element of a single-channel input array: + \f[\texttt{dst} (I)= \texttt{lowerb} (I)_0 \leq \texttt{src} (I)_0 \leq \texttt{upperb} (I)_0\f] +- For two-channel arrays: + \f[\texttt{dst} (I)= \texttt{lowerb} (I)_0 \leq \texttt{src} (I)_0 \leq \texttt{upperb} (I)_0 \land \texttt{lowerb} (I)_1 \leq \texttt{src} (I)_1 \leq \texttt{upperb} (I)_1\f] +- and so forth. + +That is, dst (I) is set to 255 (all 1 -bits) if src (I) is within the +specified 1D, 2D, 3D, ... box and 0 otherwise. + +When the lower and/or upper boundary parameters are scalars, the indexes +(I) at lowerb and upperb in the above formulas should be omitted. +@param src first input array. +@param lowerb inclusive lower boundary array or a scalar. +@param upperb inclusive upper boundary array or a scalar. +@param dst output array of the same size as src and CV_8U type. +*/ +CV_EXPORTS_W void inRange(InputArray src, InputArray lowerb, + InputArray upperb, OutputArray dst); + +/** @brief Performs the per-element comparison of two arrays or an array and scalar value. + +The function compares: +* Elements of two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \,\texttt{cmpop}\, \texttt{src2} (I)\f] +* Elements of src1 with a scalar src2 when src2 is constructed from + Scalar or has a single element: + \f[\texttt{dst} (I) = \texttt{src1}(I) \,\texttt{cmpop}\, \texttt{src2}\f] +* src1 with elements of src2 when src1 is constructed from Scalar or + has a single element: + \f[\texttt{dst} (I) = \texttt{src1} \,\texttt{cmpop}\, \texttt{src2} (I)\f] +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: +@code{.cpp} + Mat dst1 = src1 >= src2; + Mat dst2 = src1 < 8; + ... +@endcode +@param src1 first input array or a scalar; when it is an array, it must have a single channel. +@param src2 second input array or a scalar; when it is an array, it must have a single channel. +@param dst output array of type ref CV_8U that has the same size and the same number of channels as + the input arrays. +@param cmpop a flag, that specifies correspondence between the arrays (cv::CmpTypes) +@sa checkRange, min, max, threshold +*/ +CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop); + +/** @brief Calculates per-element minimum of two arrays or an array and a scalar. + +The function cv::min calculates the per-element minimum of two arrays: +\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{src2} (I))\f] +or array and a scalar: +\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{value} )\f] +@param src1 first input array. +@param src2 second input array of the same size and type as src1. +@param dst output array of the same size and type as src1. +@sa max, compare, inRange, minMaxLoc +*/ +CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void min(const Mat& src1, const Mat& src2, Mat& dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void min(const UMat& src1, const UMat& src2, UMat& dst); + +/** @brief Calculates per-element maximum of two arrays or an array and a scalar. + +The function cv::max calculates the per-element maximum of two arrays: +\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{src2} (I))\f] +or array and a scalar: +\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{value} )\f] +@param src1 first input array. +@param src2 second input array of the same size and type as src1 . +@param dst output array of the same size and type as src1. +@sa min, compare, inRange, minMaxLoc, @ref MatrixExpressions +*/ +CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void max(const Mat& src1, const Mat& src2, Mat& dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void max(const UMat& src1, const UMat& src2, UMat& dst); + +/** @brief Calculates a square root of array elements. + +The function cv::sqrt calculates a square root of each input array element. +In case of multi-channel arrays, each channel is processed +independently. The accuracy is approximately the same as of the built-in +std::sqrt . +@param src input floating-point array. +@param dst output array of the same size and type as src. +*/ +CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst); + +/** @brief Raises every array element to a power. + +The function cv::pow raises every element of the input array to power : +\f[\texttt{dst} (I) = \fork{\texttt{src}(I)^{power}}{if \(\texttt{power}\) is integer}{|\texttt{src}(I)|^{power}}{otherwise}\f] + +So, for a non-integer power exponent, the absolute values of input array +elements are used. However, it is possible to get true values for +negative values using some extra operations. In the example below, +computing the 5th root of array src shows: +@code{.cpp} + Mat mask = src < 0; + pow(src, 1./5, dst); + subtract(Scalar::all(0), dst, dst, mask); +@endcode +For some values of power, such as integer values, 0.5 and -0.5, +specialized faster algorithms are used. + +Special values (NaN, Inf) are not handled. +@param src input array. +@param power exponent of power. +@param dst output array of the same size and type as src. +@sa sqrt, exp, log, cartToPolar, polarToCart +*/ +CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst); + +/** @brief Calculates the exponent of every array element. + +The function cv::exp calculates the exponent of every element of the input +array: +\f[\texttt{dst} [I] = e^{ src(I) }\f] + +The maximum relative error is about 7e-6 for single-precision input and +less than 1e-10 for double-precision input. Currently, the function +converts denormalized values to zeros on output. Special values (NaN, +Inf) are not handled. +@param src input array. +@param dst output array of the same size and type as src. +@sa log , cartToPolar , polarToCart , phase , pow , sqrt , magnitude +*/ +CV_EXPORTS_W void exp(InputArray src, OutputArray dst); + +/** @brief Calculates the natural logarithm of every array element. + +The function cv::log calculates the natural logarithm of every element of the input array: +\f[\texttt{dst} (I) = \log (\texttt{src}(I)) \f] + +Output on zero, negative and special (NaN, Inf) values is undefined. + +@param src input array. +@param dst output array of the same size and type as src . +@sa exp, cartToPolar, polarToCart, phase, pow, sqrt, magnitude +*/ +CV_EXPORTS_W void log(InputArray src, OutputArray dst); + +/** @brief Calculates x and y coordinates of 2D vectors from their magnitude and angle. + +The function cv::polarToCart calculates the Cartesian coordinates of each 2D +vector represented by the corresponding elements of magnitude and angle: +\f[\begin{array}{l} \texttt{x} (I) = \texttt{magnitude} (I) \cos ( \texttt{angle} (I)) \\ \texttt{y} (I) = \texttt{magnitude} (I) \sin ( \texttt{angle} (I)) \\ \end{array}\f] + +The relative accuracy of the estimated coordinates is about 1e-6. +@param magnitude input floating-point array of magnitudes of 2D vectors; +it can be an empty matrix (=Mat()), in this case, the function assumes +that all the magnitudes are =1; if it is not empty, it must have the +same size and type as angle. +@param angle input floating-point array of angles of 2D vectors. +@param x output array of x-coordinates of 2D vectors; it has the same +size and type as angle. +@param y output array of y-coordinates of 2D vectors; it has the same +size and type as angle. +@param angleInDegrees when true, the input angles are measured in +degrees, otherwise, they are measured in radians. +@sa cartToPolar, magnitude, phase, exp, log, pow, sqrt +*/ +CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, + OutputArray x, OutputArray y, bool angleInDegrees = false); + +/** @brief Calculates the magnitude and angle of 2D vectors. + +The function cv::cartToPolar calculates either the magnitude, angle, or both +for every 2D vector (x(I),y(I)): +\f[\begin{array}{l} \texttt{magnitude} (I)= \sqrt{\texttt{x}(I)^2+\texttt{y}(I)^2} , \\ \texttt{angle} (I)= \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))[ \cdot180 / \pi ] \end{array}\f] + +The angles are calculated with accuracy about 0.3 degrees. For the point +(0,0), the angle is set to 0. +@param x array of x-coordinates; this must be a single-precision or +double-precision floating-point array. +@param y array of y-coordinates, that must have the same size and same type as x. +@param magnitude output array of magnitudes of the same size and type as x. +@param angle output array of angles that has the same size and type as +x; the angles are measured in radians (from 0 to 2\*Pi) or in degrees (0 to 360 degrees). +@param angleInDegrees a flag, indicating whether the angles are measured +in radians (which is by default), or in degrees. +@sa Sobel, Scharr +*/ +CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, + OutputArray magnitude, OutputArray angle, + bool angleInDegrees = false); + +/** @brief Calculates the rotation angle of 2D vectors. + +The function cv::phase calculates the rotation angle of each 2D vector that +is formed from the corresponding elements of x and y : +\f[\texttt{angle} (I) = \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))\f] + +The angle estimation accuracy is about 0.3 degrees. When x(I)=y(I)=0 , +the corresponding angle(I) is set to 0. +@param x input floating-point array of x-coordinates of 2D vectors. +@param y input array of y-coordinates of 2D vectors; it must have the +same size and the same type as x. +@param angle output array of vector angles; it has the same size and +same type as x . +@param angleInDegrees when true, the function calculates the angle in +degrees, otherwise, they are measured in radians. +*/ +CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, + bool angleInDegrees = false); + +/** @brief Calculates the magnitude of 2D vectors. + +The function cv::magnitude calculates the magnitude of 2D vectors formed +from the corresponding elements of x and y arrays: +\f[\texttt{dst} (I) = \sqrt{\texttt{x}(I)^2 + \texttt{y}(I)^2}\f] +@param x floating-point array of x-coordinates of the vectors. +@param y floating-point array of y-coordinates of the vectors; it must +have the same size as x. +@param magnitude output array of the same size and type as x. +@sa cartToPolar, polarToCart, phase, sqrt +*/ +CV_EXPORTS_W void magnitude(InputArray x, InputArray y, OutputArray magnitude); + +/** @brief Checks every element of an input array for invalid values. + +The function cv::checkRange checks that every array element is neither NaN nor infinite. When minVal \> +-DBL_MAX and maxVal \< DBL_MAX, the function also checks that each value is between minVal and +maxVal. In case of multi-channel arrays, each channel is processed independently. If some values +are out of range, position of the first outlier is stored in pos (when pos != NULL). Then, the +function either returns false (when quiet=true) or throws an exception. +@param a input array. +@param quiet a flag, indicating whether the functions quietly return false when the array elements +are out of range or they throw an exception. +@param pos optional output parameter, when not NULL, must be a pointer to array of src.dims +elements. +@param minVal inclusive lower boundary of valid values range. +@param maxVal exclusive upper boundary of valid values range. +*/ +CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0, + double minVal = -DBL_MAX, double maxVal = DBL_MAX); + +/** @brief converts NaNs to the given number +@param a input/output matrix (CV_32F type). +@param val value to convert the NaNs +*/ +CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0); + +/** @brief Performs generalized matrix multiplication. + +The function cv::gemm performs generalized matrix multiplication similar to the +gemm functions in BLAS level 3. For example, +`gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)` +corresponds to +\f[\texttt{dst} = \texttt{alpha} \cdot \texttt{src1} ^T \cdot \texttt{src2} + \texttt{beta} \cdot \texttt{src3} ^T\f] + +In case of complex (two-channel) data, performed a complex matrix +multiplication. + +The function can be replaced with a matrix expression. For example, the +above call can be replaced with: +@code{.cpp} + dst = alpha*src1.t()*src2 + beta*src3.t(); +@endcode +@param src1 first multiplied input matrix that could be real(CV_32FC1, +CV_64FC1) or complex(CV_32FC2, CV_64FC2). +@param src2 second multiplied input matrix of the same type as src1. +@param alpha weight of the matrix product. +@param src3 third optional delta matrix added to the matrix product; it +should have the same type as src1 and src2. +@param beta weight of src3. +@param dst output matrix; it has the proper size and the same type as +input matrices. +@param flags operation flags (cv::GemmFlags) +@sa mulTransposed , transform +*/ +CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha, + InputArray src3, double beta, OutputArray dst, int flags = 0); + +/** @brief Calculates the product of a matrix and its transposition. + +The function cv::mulTransposed calculates the product of src and its +transposition: +\f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} )^T ( \texttt{src} - \texttt{delta} )\f] +if aTa=true , and +\f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} ) ( \texttt{src} - \texttt{delta} )^T\f] +otherwise. The function is used to calculate the covariance matrix. With +zero delta, it can be used as a faster substitute for general matrix +product A\*B when B=A' +@param src input single-channel matrix. Note that unlike gemm, the +function can multiply not only floating-point matrices. +@param dst output square matrix. +@param aTa Flag specifying the multiplication ordering. See the +description below. +@param delta Optional delta matrix subtracted from src before the +multiplication. When the matrix is empty ( delta=noArray() ), it is +assumed to be zero, that is, nothing is subtracted. If it has the same +size as src , it is simply subtracted. Otherwise, it is "repeated" (see +repeat ) to cover the full src and then subtracted. Type of the delta +matrix, when it is not empty, must be the same as the type of created +output matrix. See the dtype parameter description below. +@param scale Optional scale factor for the matrix product. +@param dtype Optional type of the output matrix. When it is negative, +the output matrix will have the same type as src . Otherwise, it will be +type=CV_MAT_DEPTH(dtype) that should be either CV_32F or CV_64F . +@sa calcCovarMatrix, gemm, repeat, reduce +*/ +CV_EXPORTS_W void mulTransposed( InputArray src, OutputArray dst, bool aTa, + InputArray delta = noArray(), + double scale = 1, int dtype = -1 ); + +/** @brief Transposes a matrix. + +The function cv::transpose transposes the matrix src : +\f[\texttt{dst} (i,j) = \texttt{src} (j,i)\f] +@note No complex conjugation is done in case of a complex matrix. It +should be done separately if needed. +@param src input array. +@param dst output array of the same type as src. +*/ +CV_EXPORTS_W void transpose(InputArray src, OutputArray dst); + +/** @brief Performs the matrix transformation of every array element. + +The function cv::transform performs the matrix transformation of every +element of the array src and stores the results in dst : +\f[\texttt{dst} (I) = \texttt{m} \cdot \texttt{src} (I)\f] +(when m.cols=src.channels() ), or +\f[\texttt{dst} (I) = \texttt{m} \cdot [ \texttt{src} (I); 1]\f] +(when m.cols=src.channels()+1 ) + +Every element of the N -channel array src is interpreted as N -element +vector that is transformed using the M x N or M x (N+1) matrix m to +M-element vector - the corresponding element of the output array dst . + +The function may be used for geometrical transformation of +N -dimensional points, arbitrary linear color space transformation (such +as various kinds of RGB to YUV transforms), shuffling the image +channels, and so forth. +@param src input array that must have as many channels (1 to 4) as +m.cols or m.cols-1. +@param dst output array of the same size and depth as src; it has as +many channels as m.rows. +@param m transformation 2x2 or 2x3 floating-point matrix. +@sa perspectiveTransform, getAffineTransform, estimateAffine2D, warpAffine, warpPerspective +*/ +CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m ); + +/** @brief Performs the perspective matrix transformation of vectors. + +The function cv::perspectiveTransform transforms every element of src by +treating it as a 2D or 3D vector, in the following way: +\f[(x, y, z) \rightarrow (x'/w, y'/w, z'/w)\f] +where +\f[(x', y', z', w') = \texttt{mat} \cdot \begin{bmatrix} x & y & z & 1 \end{bmatrix}\f] +and +\f[w = \fork{w'}{if \(w' \ne 0\)}{\infty}{otherwise}\f] + +Here a 3D vector transformation is shown. In case of a 2D vector +transformation, the z component is omitted. + +@note The function transforms a sparse set of 2D or 3D vectors. If you +want to transform an image using perspective transformation, use +warpPerspective . If you have an inverse problem, that is, you want to +compute the most probable perspective transformation out of several +pairs of corresponding points, you can use getPerspectiveTransform or +findHomography . +@param src input two-channel or three-channel floating-point array; each +element is a 2D/3D vector to be transformed. +@param dst output array of the same size and type as src. +@param m 3x3 or 4x4 floating-point transformation matrix. +@sa transform, warpPerspective, getPerspectiveTransform, findHomography +*/ +CV_EXPORTS_W void perspectiveTransform(InputArray src, OutputArray dst, InputArray m ); + +/** @brief Copies the lower or the upper half of a square matrix to its another half. + +The function cv::completeSymm copies the lower or the upper half of a square matrix to +its another half. The matrix diagonal remains unchanged: + - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i > j\f$ if + lowerToUpper=false + - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i < j\f$ if + lowerToUpper=true + +@param m input-output floating-point square matrix. +@param lowerToUpper operation flag; if true, the lower half is copied to +the upper half. Otherwise, the upper half is copied to the lower half. +@sa flip, transpose +*/ +CV_EXPORTS_W void completeSymm(InputOutputArray m, bool lowerToUpper = false); + +/** @brief Initializes a scaled identity matrix. + +The function cv::setIdentity initializes a scaled identity matrix: +\f[\texttt{mtx} (i,j)= \fork{\texttt{value}}{ if \(i=j\)}{0}{otherwise}\f] + +The function can also be emulated using the matrix initializers and the +matrix expressions: +@code + Mat A = Mat::eye(4, 3, CV_32F)*5; + // A will be set to [[5, 0, 0], [0, 5, 0], [0, 0, 5], [0, 0, 0]] +@endcode +@param mtx matrix to initialize (not necessarily square). +@param s value to assign to diagonal elements. +@sa Mat::zeros, Mat::ones, Mat::setTo, Mat::operator= +*/ +CV_EXPORTS_W void setIdentity(InputOutputArray mtx, const Scalar& s = Scalar(1)); + +/** @brief Returns the determinant of a square floating-point matrix. + +The function cv::determinant calculates and returns the determinant of the +specified matrix. For small matrices ( mtx.cols=mtx.rows\<=3 ), the +direct method is used. For larger matrices, the function uses LU +factorization with partial pivoting. + +For symmetric positively-determined matrices, it is also possible to use +eigen decomposition to calculate the determinant. +@param mtx input matrix that must have CV_32FC1 or CV_64FC1 type and +square size. +@sa trace, invert, solve, eigen, @ref MatrixExpressions +*/ +CV_EXPORTS_W double determinant(InputArray mtx); + +/** @brief Returns the trace of a matrix. + +The function cv::trace returns the sum of the diagonal elements of the +matrix mtx . +\f[\mathrm{tr} ( \texttt{mtx} ) = \sum _i \texttt{mtx} (i,i)\f] +@param mtx input matrix. +*/ +CV_EXPORTS_W Scalar trace(InputArray mtx); + +/** @brief Finds the inverse or pseudo-inverse of a matrix. + +The function cv::invert inverts the matrix src and stores the result in dst +. When the matrix src is singular or non-square, the function calculates +the pseudo-inverse matrix (the dst matrix) so that norm(src\*dst - I) is +minimal, where I is an identity matrix. + +In case of the #DECOMP_LU method, the function returns non-zero value if +the inverse has been successfully calculated and 0 if src is singular. + +In case of the #DECOMP_SVD method, the function returns the inverse +condition number of src (the ratio of the smallest singular value to the +largest singular value) and 0 if src is singular. The SVD method +calculates a pseudo-inverse matrix if src is singular. + +Similarly to #DECOMP_LU, the method #DECOMP_CHOLESKY works only with +non-singular square matrices that should also be symmetrical and +positively defined. In this case, the function stores the inverted +matrix in dst and returns non-zero. Otherwise, it returns 0. + +@param src input floating-point M x N matrix. +@param dst output matrix of N x M size and the same type as src. +@param flags inversion method (cv::DecompTypes) +@sa solve, SVD +*/ +CV_EXPORTS_W double invert(InputArray src, OutputArray dst, int flags = DECOMP_LU); + +/** @brief Solves one or more linear systems or least-squares problems. + +The function cv::solve solves a linear system or least-squares problem (the +latter is possible with SVD or QR methods, or by specifying the flag +#DECOMP_NORMAL ): +\f[\texttt{dst} = \arg \min _X \| \texttt{src1} \cdot \texttt{X} - \texttt{src2} \|\f] + +If #DECOMP_LU or #DECOMP_CHOLESKY method is used, the function returns 1 +if src1 (or \f$\texttt{src1}^T\texttt{src1}\f$ ) is non-singular. Otherwise, +it returns 0. In the latter case, dst is not valid. Other methods find a +pseudo-solution in case of a singular left-hand side part. + +@note If you want to find a unity-norm solution of an under-defined +singular system \f$\texttt{src1}\cdot\texttt{dst}=0\f$ , the function solve +will not do the work. Use SVD::solveZ instead. + +@param src1 input matrix on the left-hand side of the system. +@param src2 input matrix on the right-hand side of the system. +@param dst output solution. +@param flags solution (matrix inversion) method (#DecompTypes) +@sa invert, SVD, eigen +*/ +CV_EXPORTS_W bool solve(InputArray src1, InputArray src2, + OutputArray dst, int flags = DECOMP_LU); + +/** @brief Sorts each row or each column of a matrix. + +The function cv::sort sorts each matrix row or each matrix column in +ascending or descending order. So you should pass two operation flags to +get desired behaviour. If you want to sort matrix rows or columns +lexicographically, you can use STL std::sort generic function with the +proper comparison predicate. + +@param src input single-channel array. +@param dst output array of the same size and type as src. +@param flags operation flags, a combination of #SortFlags +@sa sortIdx, randShuffle +*/ +CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags); + +/** @brief Sorts each row or each column of a matrix. + +The function cv::sortIdx sorts each matrix row or each matrix column in the +ascending or descending order. So you should pass two operation flags to +get desired behaviour. Instead of reordering the elements themselves, it +stores the indices of sorted elements in the output array. For example: +@code + Mat A = Mat::eye(3,3,CV_32F), B; + sortIdx(A, B, SORT_EVERY_ROW + SORT_ASCENDING); + // B will probably contain + // (because of equal elements in A some permutations are possible): + // [[1, 2, 0], [0, 2, 1], [0, 1, 2]] +@endcode +@param src input single-channel array. +@param dst output integer array of the same size as src. +@param flags operation flags that could be a combination of cv::SortFlags +@sa sort, randShuffle +*/ +CV_EXPORTS_W void sortIdx(InputArray src, OutputArray dst, int flags); + +/** @brief Finds the real roots of a cubic equation. + +The function solveCubic finds the real roots of a cubic equation: +- if coeffs is a 4-element vector: +\f[\texttt{coeffs} [0] x^3 + \texttt{coeffs} [1] x^2 + \texttt{coeffs} [2] x + \texttt{coeffs} [3] = 0\f] +- if coeffs is a 3-element vector: +\f[x^3 + \texttt{coeffs} [0] x^2 + \texttt{coeffs} [1] x + \texttt{coeffs} [2] = 0\f] + +The roots are stored in the roots array. +@param coeffs equation coefficients, an array of 3 or 4 elements. +@param roots output array of real roots that has 1 or 3 elements. +@return number of real roots. It can be 0, 1 or 2. +*/ +CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots); + +/** @brief Finds the real or complex roots of a polynomial equation. + +The function cv::solvePoly finds real and complex roots of a polynomial equation: +\f[\texttt{coeffs} [n] x^{n} + \texttt{coeffs} [n-1] x^{n-1} + ... + \texttt{coeffs} [1] x + \texttt{coeffs} [0] = 0\f] +@param coeffs array of polynomial coefficients. +@param roots output (complex) array of roots. +@param maxIters maximum number of iterations the algorithm does. +*/ +CV_EXPORTS_W double solvePoly(InputArray coeffs, OutputArray roots, int maxIters = 300); + +/** @brief Calculates eigenvalues and eigenvectors of a symmetric matrix. + +The function cv::eigen calculates just eigenvalues, or eigenvalues and eigenvectors of the symmetric +matrix src: +@code + src*eigenvectors.row(i).t() = eigenvalues.at(i)*eigenvectors.row(i).t() +@endcode + +@note Use cv::eigenNonSymmetric for calculation of real eigenvalues and eigenvectors of non-symmetric matrix. + +@param src input matrix that must have CV_32FC1 or CV_64FC1 type, square size and be symmetrical +(src ^T^ == src). +@param eigenvalues output vector of eigenvalues of the same type as src; the eigenvalues are stored +in the descending order. +@param eigenvectors output matrix of eigenvectors; it has the same size and type as src; the +eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding +eigenvalues. +@sa eigenNonSymmetric, completeSymm , PCA +*/ +CV_EXPORTS_W bool eigen(InputArray src, OutputArray eigenvalues, + OutputArray eigenvectors = noArray()); + +/** @brief Calculates eigenvalues and eigenvectors of a non-symmetric matrix (real eigenvalues only). + +@note Assumes real eigenvalues. + +The function calculates eigenvalues and eigenvectors (optional) of the square matrix src: +@code + src*eigenvectors.row(i).t() = eigenvalues.at(i)*eigenvectors.row(i).t() +@endcode + +@param src input matrix (CV_32FC1 or CV_64FC1 type). +@param eigenvalues output vector of eigenvalues (type is the same type as src). +@param eigenvectors output matrix of eigenvectors (type is the same type as src). The eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues. +@sa eigen +*/ +CV_EXPORTS_W void eigenNonSymmetric(InputArray src, OutputArray eigenvalues, + OutputArray eigenvectors); + +/** @brief Calculates the covariance matrix of a set of vectors. + +The function cv::calcCovarMatrix calculates the covariance matrix and, optionally, the mean vector of +the set of input vectors. +@param samples samples stored as separate matrices +@param nsamples number of samples +@param covar output covariance matrix of the type ctype and square size. +@param mean input or output (depending on the flags) array as the average value of the input vectors. +@param flags operation flags as a combination of #CovarFlags +@param ctype type of the matrixl; it equals 'CV_64F' by default. +@sa PCA, mulTransposed, Mahalanobis +@todo InputArrayOfArrays +*/ +CV_EXPORTS void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, Mat& mean, + int flags, int ctype = CV_64F); + +/** @overload +@note use #COVAR_ROWS or #COVAR_COLS flag +@param samples samples stored as rows/columns of a single matrix. +@param covar output covariance matrix of the type ctype and square size. +@param mean input or output (depending on the flags) array as the average value of the input vectors. +@param flags operation flags as a combination of #CovarFlags +@param ctype type of the matrixl; it equals 'CV_64F' by default. +*/ +CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar, + InputOutputArray mean, int flags, int ctype = CV_64F); + +/** wrap PCA::operator() */ +CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, int maxComponents = 0); + +/** wrap PCA::operator() and add eigenvalues output parameter */ +CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, OutputArray eigenvalues, + int maxComponents = 0); + +/** wrap PCA::operator() */ +CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, double retainedVariance); + +/** wrap PCA::operator() and add eigenvalues output parameter */ +CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, OutputArray eigenvalues, + double retainedVariance); + +/** wrap PCA::project */ +CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean, + InputArray eigenvectors, OutputArray result); + +/** wrap PCA::backProject */ +CV_EXPORTS_W void PCABackProject(InputArray data, InputArray mean, + InputArray eigenvectors, OutputArray result); + +/** wrap SVD::compute */ +CV_EXPORTS_W void SVDecomp( InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags = 0 ); + +/** wrap SVD::backSubst */ +CV_EXPORTS_W void SVBackSubst( InputArray w, InputArray u, InputArray vt, + InputArray rhs, OutputArray dst ); + +/** @brief Calculates the Mahalanobis distance between two vectors. + +The function cv::Mahalanobis calculates and returns the weighted distance between two vectors: +\f[d( \texttt{vec1} , \texttt{vec2} )= \sqrt{\sum_{i,j}{\texttt{icovar(i,j)}\cdot(\texttt{vec1}(I)-\texttt{vec2}(I))\cdot(\texttt{vec1(j)}-\texttt{vec2(j)})} }\f] +The covariance matrix may be calculated using the #calcCovarMatrix function and then inverted using +the invert function (preferably using the #DECOMP_SVD method, as the most accurate). +@param v1 first 1D input vector. +@param v2 second 1D input vector. +@param icovar inverse covariance matrix. +*/ +CV_EXPORTS_W double Mahalanobis(InputArray v1, InputArray v2, InputArray icovar); + +/** @brief Performs a forward or inverse Discrete Fourier transform of a 1D or 2D floating-point array. + +The function cv::dft performs one of the following: +- Forward the Fourier transform of a 1D vector of N elements: + \f[Y = F^{(N)} \cdot X,\f] + where \f$F^{(N)}_{jk}=\exp(-2\pi i j k/N)\f$ and \f$i=\sqrt{-1}\f$ +- Inverse the Fourier transform of a 1D vector of N elements: + \f[\begin{array}{l} X'= \left (F^{(N)} \right )^{-1} \cdot Y = \left (F^{(N)} \right )^* \cdot y \\ X = (1/N) \cdot X, \end{array}\f] + where \f$F^*=\left(\textrm{Re}(F^{(N)})-\textrm{Im}(F^{(N)})\right)^T\f$ +- Forward the 2D Fourier transform of a M x N matrix: + \f[Y = F^{(M)} \cdot X \cdot F^{(N)}\f] +- Inverse the 2D Fourier transform of a M x N matrix: + \f[\begin{array}{l} X'= \left (F^{(M)} \right )^* \cdot Y \cdot \left (F^{(N)} \right )^* \\ X = \frac{1}{M \cdot N} \cdot X' \end{array}\f] + +In case of real (single-channel) data, the output spectrum of the forward Fourier transform or input +spectrum of the inverse Fourier transform can be represented in a packed format called *CCS* +(complex-conjugate-symmetrical). It was borrowed from IPL (Intel\* Image Processing Library). Here +is how 2D *CCS* spectrum looks: +\f[\begin{bmatrix} Re Y_{0,0} & Re Y_{0,1} & Im Y_{0,1} & Re Y_{0,2} & Im Y_{0,2} & \cdots & Re Y_{0,N/2-1} & Im Y_{0,N/2-1} & Re Y_{0,N/2} \\ Re Y_{1,0} & Re Y_{1,1} & Im Y_{1,1} & Re Y_{1,2} & Im Y_{1,2} & \cdots & Re Y_{1,N/2-1} & Im Y_{1,N/2-1} & Re Y_{1,N/2} \\ Im Y_{1,0} & Re Y_{2,1} & Im Y_{2,1} & Re Y_{2,2} & Im Y_{2,2} & \cdots & Re Y_{2,N/2-1} & Im Y_{2,N/2-1} & Im Y_{1,N/2} \\ \hdotsfor{9} \\ Re Y_{M/2-1,0} & Re Y_{M-3,1} & Im Y_{M-3,1} & \hdotsfor{3} & Re Y_{M-3,N/2-1} & Im Y_{M-3,N/2-1}& Re Y_{M/2-1,N/2} \\ Im Y_{M/2-1,0} & Re Y_{M-2,1} & Im Y_{M-2,1} & \hdotsfor{3} & Re Y_{M-2,N/2-1} & Im Y_{M-2,N/2-1}& Im Y_{M/2-1,N/2} \\ Re Y_{M/2,0} & Re Y_{M-1,1} & Im Y_{M-1,1} & \hdotsfor{3} & Re Y_{M-1,N/2-1} & Im Y_{M-1,N/2-1}& Re Y_{M/2,N/2} \end{bmatrix}\f] + +In case of 1D transform of a real vector, the output looks like the first row of the matrix above. + +So, the function chooses an operation mode depending on the flags and size of the input array: +- If #DFT_ROWS is set or the input array has a single row or single column, the function + performs a 1D forward or inverse transform of each row of a matrix when #DFT_ROWS is set. + Otherwise, it performs a 2D transform. +- If the input array is real and #DFT_INVERSE is not set, the function performs a forward 1D or + 2D transform: + - When #DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as + input. + - When #DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as + input. In case of 2D transform, it uses the packed format as shown above. In case of a + single 1D transform, it looks like the first row of the matrix above. In case of + multiple 1D transforms (when using the #DFT_ROWS flag), each row of the output matrix + looks like the first row of the matrix above. +- If the input array is complex and either #DFT_INVERSE or #DFT_REAL_OUTPUT are not set, the + output is a complex array of the same size as input. The function performs a forward or + inverse 1D or 2D transform of the whole input array or each row of the input array + independently, depending on the flags DFT_INVERSE and DFT_ROWS. +- When #DFT_INVERSE is set and the input array is real, or it is complex but #DFT_REAL_OUTPUT + is set, the output is a real array of the same size as input. The function performs a 1D or 2D + inverse transformation of the whole input array or each individual row, depending on the flags + #DFT_INVERSE and #DFT_ROWS. + +If #DFT_SCALE is set, the scaling is done after the transformation. + +Unlike dct , the function supports arrays of arbitrary size. But only those arrays are processed +efficiently, whose sizes can be factorized in a product of small prime numbers (2, 3, and 5 in the +current implementation). Such an efficient DFT size can be calculated using the getOptimalDFTSize +method. + +The sample below illustrates how to calculate a DFT-based convolution of two 2D real arrays: +@code + void convolveDFT(InputArray A, InputArray B, OutputArray C) + { + // reallocate the output array if needed + C.create(abs(A.rows - B.rows)+1, abs(A.cols - B.cols)+1, A.type()); + Size dftSize; + // calculate the size of DFT transform + dftSize.width = getOptimalDFTSize(A.cols + B.cols - 1); + dftSize.height = getOptimalDFTSize(A.rows + B.rows - 1); + + // allocate temporary buffers and initialize them with 0's + Mat tempA(dftSize, A.type(), Scalar::all(0)); + Mat tempB(dftSize, B.type(), Scalar::all(0)); + + // copy A and B to the top-left corners of tempA and tempB, respectively + Mat roiA(tempA, Rect(0,0,A.cols,A.rows)); + A.copyTo(roiA); + Mat roiB(tempB, Rect(0,0,B.cols,B.rows)); + B.copyTo(roiB); + + // now transform the padded A & B in-place; + // use "nonzeroRows" hint for faster processing + dft(tempA, tempA, 0, A.rows); + dft(tempB, tempB, 0, B.rows); + + // multiply the spectrums; + // the function handles packed spectrum representations well + mulSpectrums(tempA, tempB, tempA); + + // transform the product back from the frequency domain. + // Even though all the result rows will be non-zero, + // you need only the first C.rows of them, and thus you + // pass nonzeroRows == C.rows + dft(tempA, tempA, DFT_INVERSE + DFT_SCALE, C.rows); + + // now copy the result back to C. + tempA(Rect(0, 0, C.cols, C.rows)).copyTo(C); + + // all the temporary buffers will be deallocated automatically + } +@endcode +To optimize this sample, consider the following approaches: +- Since nonzeroRows != 0 is passed to the forward transform calls and since A and B are copied to + the top-left corners of tempA and tempB, respectively, it is not necessary to clear the whole + tempA and tempB. It is only necessary to clear the tempA.cols - A.cols ( tempB.cols - B.cols) + rightmost columns of the matrices. +- This DFT-based convolution does not have to be applied to the whole big arrays, especially if B + is significantly smaller than A or vice versa. Instead, you can calculate convolution by parts. + To do this, you need to split the output array C into multiple tiles. For each tile, estimate + which parts of A and B are required to calculate convolution in this tile. If the tiles in C are + too small, the speed will decrease a lot because of repeated work. In the ultimate case, when + each tile in C is a single pixel, the algorithm becomes equivalent to the naive convolution + algorithm. If the tiles are too big, the temporary arrays tempA and tempB become too big and + there is also a slowdown because of bad cache locality. So, there is an optimal tile size + somewhere in the middle. +- If different tiles in C can be calculated in parallel and, thus, the convolution is done by + parts, the loop can be threaded. + +All of the above improvements have been implemented in #matchTemplate and #filter2D . Therefore, by +using them, you can get the performance even better than with the above theoretically optimal +implementation. Though, those two functions actually calculate cross-correlation, not convolution, +so you need to "flip" the second convolution operand B vertically and horizontally using flip . +@note +- An example using the discrete fourier transform can be found at + opencv_source_code/samples/cpp/dft.cpp +- (Python) An example using the dft functionality to perform Wiener deconvolution can be found + at opencv_source/samples/python/deconvolution.py +- (Python) An example rearranging the quadrants of a Fourier image can be found at + opencv_source/samples/python/dft.py +@param src input array that could be real or complex. +@param dst output array whose size and type depends on the flags . +@param flags transformation flags, representing a combination of the #DftFlags +@param nonzeroRows when the parameter is not zero, the function assumes that only the first +nonzeroRows rows of the input array (#DFT_INVERSE is not set) or only the first nonzeroRows of the +output array (#DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the +rows more efficiently and save some time; this technique is very useful for calculating array +cross-correlation or convolution using DFT. +@sa dct , getOptimalDFTSize , mulSpectrums, filter2D , matchTemplate , flip , cartToPolar , +magnitude , phase +*/ +CV_EXPORTS_W void dft(InputArray src, OutputArray dst, int flags = 0, int nonzeroRows = 0); + +/** @brief Calculates the inverse Discrete Fourier Transform of a 1D or 2D array. + +idft(src, dst, flags) is equivalent to dft(src, dst, flags | #DFT_INVERSE) . +@note None of dft and idft scales the result by default. So, you should pass #DFT_SCALE to one of +dft or idft explicitly to make these transforms mutually inverse. +@sa dft, dct, idct, mulSpectrums, getOptimalDFTSize +@param src input floating-point real or complex array. +@param dst output array whose size and type depend on the flags. +@param flags operation flags (see dft and #DftFlags). +@param nonzeroRows number of dst rows to process; the rest of the rows have undefined content (see +the convolution sample in dft description. +*/ +CV_EXPORTS_W void idft(InputArray src, OutputArray dst, int flags = 0, int nonzeroRows = 0); + +/** @brief Performs a forward or inverse discrete Cosine transform of 1D or 2D array. + +The function cv::dct performs a forward or inverse discrete Cosine transform (DCT) of a 1D or 2D +floating-point array: +- Forward Cosine transform of a 1D vector of N elements: + \f[Y = C^{(N)} \cdot X\f] + where + \f[C^{(N)}_{jk}= \sqrt{\alpha_j/N} \cos \left ( \frac{\pi(2k+1)j}{2N} \right )\f] + and + \f$\alpha_0=1\f$, \f$\alpha_j=2\f$ for *j \> 0*. +- Inverse Cosine transform of a 1D vector of N elements: + \f[X = \left (C^{(N)} \right )^{-1} \cdot Y = \left (C^{(N)} \right )^T \cdot Y\f] + (since \f$C^{(N)}\f$ is an orthogonal matrix, \f$C^{(N)} \cdot \left(C^{(N)}\right)^T = I\f$ ) +- Forward 2D Cosine transform of M x N matrix: + \f[Y = C^{(N)} \cdot X \cdot \left (C^{(N)} \right )^T\f] +- Inverse 2D Cosine transform of M x N matrix: + \f[X = \left (C^{(N)} \right )^T \cdot X \cdot C^{(N)}\f] + +The function chooses the mode of operation by looking at the flags and size of the input array: +- If (flags & #DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it + is an inverse 1D or 2D transform. +- If (flags & #DCT_ROWS) != 0 , the function performs a 1D transform of each row. +- If the array is a single column or a single row, the function performs a 1D transform. +- If none of the above is true, the function performs a 2D transform. + +@note Currently dct supports even-size arrays (2, 4, 6 ...). For data analysis and approximation, you +can pad the array when necessary. +Also, the function performance depends very much, and not monotonically, on the array size (see +getOptimalDFTSize ). In the current implementation DCT of a vector of size N is calculated via DFT +of a vector of size N/2 . Thus, the optimal DCT size N1 \>= N can be calculated as: +@code + size_t getOptimalDCTSize(size_t N) { return 2*getOptimalDFTSize((N+1)/2); } + N1 = getOptimalDCTSize(N); +@endcode +@param src input floating-point array. +@param dst output array of the same size and type as src . +@param flags transformation flags as a combination of cv::DftFlags (DCT_*) +@sa dft , getOptimalDFTSize , idct +*/ +CV_EXPORTS_W void dct(InputArray src, OutputArray dst, int flags = 0); + +/** @brief Calculates the inverse Discrete Cosine Transform of a 1D or 2D array. + +idct(src, dst, flags) is equivalent to dct(src, dst, flags | DCT_INVERSE). +@param src input floating-point single-channel array. +@param dst output array of the same size and type as src. +@param flags operation flags. +@sa dct, dft, idft, getOptimalDFTSize +*/ +CV_EXPORTS_W void idct(InputArray src, OutputArray dst, int flags = 0); + +/** @brief Performs the per-element multiplication of two Fourier spectrums. + +The function cv::mulSpectrums performs the per-element multiplication of the two CCS-packed or complex +matrices that are results of a real or complex Fourier transform. + +The function, together with dft and idft , may be used to calculate convolution (pass conjB=false ) +or correlation (pass conjB=true ) of two arrays rapidly. When the arrays are complex, they are +simply multiplied (per element) with an optional conjugation of the second-array elements. When the +arrays are real, they are assumed to be CCS-packed (see dft for details). +@param a first input array. +@param b second input array of the same size and type as src1 . +@param c output array of the same size and type as src1 . +@param flags operation flags; currently, the only supported flag is cv::DFT_ROWS, which indicates that +each row of src1 and src2 is an independent 1D Fourier spectrum. If you do not want to use this flag, then simply add a `0` as value. +@param conjB optional flag that conjugates the second input array before the multiplication (true) +or not (false). +*/ +CV_EXPORTS_W void mulSpectrums(InputArray a, InputArray b, OutputArray c, + int flags, bool conjB = false); + +/** @brief Returns the optimal DFT size for a given vector size. + +DFT performance is not a monotonic function of a vector size. Therefore, when you calculate +convolution of two arrays or perform the spectral analysis of an array, it usually makes sense to +pad the input data with zeros to get a bit larger array that can be transformed much faster than the +original one. Arrays whose size is a power-of-two (2, 4, 8, 16, 32, ...) are the fastest to process. +Though, the arrays whose size is a product of 2's, 3's, and 5's (for example, 300 = 5\*5\*3\*2\*2) +are also processed quite efficiently. + +The function cv::getOptimalDFTSize returns the minimum number N that is greater than or equal to vecsize +so that the DFT of a vector of size N can be processed efficiently. In the current implementation N += 2 ^p^ \* 3 ^q^ \* 5 ^r^ for some integer p, q, r. + +The function returns a negative number if vecsize is too large (very close to INT_MAX ). + +While the function cannot be used directly to estimate the optimal vector size for DCT transform +(since the current DCT implementation supports only even-size vectors), it can be easily processed +as getOptimalDFTSize((vecsize+1)/2)\*2. +@param vecsize vector size. +@sa dft , dct , idft , idct , mulSpectrums +*/ +CV_EXPORTS_W int getOptimalDFTSize(int vecsize); + +/** @brief Returns the default random number generator. + +The function cv::theRNG returns the default random number generator. For each thread, there is a +separate random number generator, so you can use the function safely in multi-thread environments. +If you just need to get a single random number using this generator or initialize an array, you can +use randu or randn instead. But if you are going to generate many random numbers inside a loop, it +is much faster to use this function to retrieve the generator and then use RNG::operator _Tp() . +@sa RNG, randu, randn +*/ +CV_EXPORTS RNG& theRNG(); + +/** @brief Sets state of default random number generator. + +The function cv::setRNGSeed sets state of default random number generator to custom value. +@param seed new state for default random number generator +@sa RNG, randu, randn +*/ +CV_EXPORTS_W void setRNGSeed(int seed); + +/** @brief Generates a single uniformly-distributed random number or an array of random numbers. + +Non-template variant of the function fills the matrix dst with uniformly-distributed +random numbers from the specified range: +\f[\texttt{low} _c \leq \texttt{dst} (I)_c < \texttt{high} _c\f] +@param dst output array of random numbers; the array must be pre-allocated. +@param low inclusive lower boundary of the generated random numbers. +@param high exclusive upper boundary of the generated random numbers. +@sa RNG, randn, theRNG +*/ +CV_EXPORTS_W void randu(InputOutputArray dst, InputArray low, InputArray high); + +/** @brief Fills the array with normally distributed random numbers. + +The function cv::randn fills the matrix dst with normally distributed random numbers with the specified +mean vector and the standard deviation matrix. The generated random numbers are clipped to fit the +value range of the output array data type. +@param dst output array of random numbers; the array must be pre-allocated and have 1 to 4 channels. +@param mean mean value (expectation) of the generated random numbers. +@param stddev standard deviation of the generated random numbers; it can be either a vector (in +which case a diagonal standard deviation matrix is assumed) or a square matrix. +@sa RNG, randu +*/ +CV_EXPORTS_W void randn(InputOutputArray dst, InputArray mean, InputArray stddev); + +/** @brief Shuffles the array elements randomly. + +The function cv::randShuffle shuffles the specified 1D array by randomly choosing pairs of elements and +swapping them. The number of such swap operations will be dst.rows\*dst.cols\*iterFactor . +@param dst input/output numerical 1D array. +@param iterFactor scale factor that determines the number of random swap operations (see the details +below). +@param rng optional random number generator used for shuffling; if it is zero, theRNG () is used +instead. +@sa RNG, sort +*/ +CV_EXPORTS_W void randShuffle(InputOutputArray dst, double iterFactor = 1., RNG* rng = 0); + +/** @brief Principal Component Analysis + +The class is used to calculate a special basis for a set of vectors. The +basis will consist of eigenvectors of the covariance matrix calculated +from the input set of vectors. The class %PCA can also transform +vectors to/from the new coordinate space defined by the basis. Usually, +in this new coordinate system, each vector from the original set (and +any linear combination of such vectors) can be quite accurately +approximated by taking its first few components, corresponding to the +eigenvectors of the largest eigenvalues of the covariance matrix. +Geometrically it means that you calculate a projection of the vector to +a subspace formed by a few eigenvectors corresponding to the dominant +eigenvalues of the covariance matrix. And usually such a projection is +very close to the original vector. So, you can represent the original +vector from a high-dimensional space with a much shorter vector +consisting of the projected vector's coordinates in the subspace. Such a +transformation is also known as Karhunen-Loeve Transform, or KLT. +See http://en.wikipedia.org/wiki/Principal_component_analysis + +The sample below is the function that takes two matrices. The first +function stores a set of vectors (a row per vector) that is used to +calculate PCA. The second function stores another "test" set of vectors +(a row per vector). First, these vectors are compressed with PCA, then +reconstructed back, and then the reconstruction error norm is computed +and printed for each vector. : + +@code{.cpp} +using namespace cv; + +PCA compressPCA(const Mat& pcaset, int maxComponents, + const Mat& testset, Mat& compressed) +{ + PCA pca(pcaset, // pass the data + Mat(), // we do not have a pre-computed mean vector, + // so let the PCA engine to compute it + PCA::DATA_AS_ROW, // indicate that the vectors + // are stored as matrix rows + // (use PCA::DATA_AS_COL if the vectors are + // the matrix columns) + maxComponents // specify, how many principal components to retain + ); + // if there is no test data, just return the computed basis, ready-to-use + if( !testset.data ) + return pca; + CV_Assert( testset.cols == pcaset.cols ); + + compressed.create(testset.rows, maxComponents, testset.type()); + + Mat reconstructed; + for( int i = 0; i < testset.rows; i++ ) + { + Mat vec = testset.row(i), coeffs = compressed.row(i), reconstructed; + // compress the vector, the result will be stored + // in the i-th row of the output matrix + pca.project(vec, coeffs); + // and then reconstruct it + pca.backProject(coeffs, reconstructed); + // and measure the error + printf("%d. diff = %g\n", i, norm(vec, reconstructed, NORM_L2)); + } + return pca; +} +@endcode +@sa calcCovarMatrix, mulTransposed, SVD, dft, dct +*/ +class CV_EXPORTS PCA +{ +public: + enum Flags { DATA_AS_ROW = 0, //!< indicates that the input samples are stored as matrix rows + DATA_AS_COL = 1, //!< indicates that the input samples are stored as matrix columns + USE_AVG = 2 //! + }; + + /** @brief default constructor + + The default constructor initializes an empty %PCA structure. The other + constructors initialize the structure and call PCA::operator()(). + */ + PCA(); + + /** @overload + @param data input samples stored as matrix rows or matrix columns. + @param mean optional mean value; if the matrix is empty (@c noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout (PCA::Flags) + @param maxComponents maximum number of components that %PCA should + retain; by default, all the components are retained. + */ + PCA(InputArray data, InputArray mean, int flags, int maxComponents = 0); + + /** @overload + @param data input samples stored as matrix rows or matrix columns. + @param mean optional mean value; if the matrix is empty (noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout (PCA::Flags) + @param retainedVariance Percentage of variance that PCA should retain. + Using this parameter will let the PCA decided how many components to + retain but it will always keep at least 2. + */ + PCA(InputArray data, InputArray mean, int flags, double retainedVariance); + + /** @brief performs %PCA + + The operator performs %PCA of the supplied dataset. It is safe to reuse + the same PCA structure for multiple datasets. That is, if the structure + has been previously used with another dataset, the existing internal + data is reclaimed and the new @ref eigenvalues, @ref eigenvectors and @ref + mean are allocated and computed. + + The computed @ref eigenvalues are sorted from the largest to the smallest and + the corresponding @ref eigenvectors are stored as eigenvectors rows. + + @param data input samples stored as the matrix rows or as the matrix + columns. + @param mean optional mean value; if the matrix is empty (noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout. (Flags) + @param maxComponents maximum number of components that PCA should + retain; by default, all the components are retained. + */ + PCA& operator()(InputArray data, InputArray mean, int flags, int maxComponents = 0); + + /** @overload + @param data input samples stored as the matrix rows or as the matrix + columns. + @param mean optional mean value; if the matrix is empty (noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout. (PCA::Flags) + @param retainedVariance Percentage of variance that %PCA should retain. + Using this parameter will let the %PCA decided how many components to + retain but it will always keep at least 2. + */ + PCA& operator()(InputArray data, InputArray mean, int flags, double retainedVariance); + + /** @brief Projects vector(s) to the principal component subspace. + + The methods project one or more vectors to the principal component + subspace, where each vector projection is represented by coefficients in + the principal component basis. The first form of the method returns the + matrix that the second form writes to the result. So the first form can + be used as a part of expression while the second form can be more + efficient in a processing loop. + @param vec input vector(s); must have the same dimensionality and the + same layout as the input data used at %PCA phase, that is, if + DATA_AS_ROW are specified, then `vec.cols==data.cols` + (vector dimensionality) and `vec.rows` is the number of vectors to + project, and the same is true for the PCA::DATA_AS_COL case. + */ + Mat project(InputArray vec) const; + + /** @overload + @param vec input vector(s); must have the same dimensionality and the + same layout as the input data used at PCA phase, that is, if + DATA_AS_ROW are specified, then `vec.cols==data.cols` + (vector dimensionality) and `vec.rows` is the number of vectors to + project, and the same is true for the PCA::DATA_AS_COL case. + @param result output vectors; in case of PCA::DATA_AS_COL, the + output matrix has as many columns as the number of input vectors, this + means that `result.cols==vec.cols` and the number of rows match the + number of principal components (for example, `maxComponents` parameter + passed to the constructor). + */ + void project(InputArray vec, OutputArray result) const; + + /** @brief Reconstructs vectors from their PC projections. + + The methods are inverse operations to PCA::project. They take PC + coordinates of projected vectors and reconstruct the original vectors. + Unless all the principal components have been retained, the + reconstructed vectors are different from the originals. But typically, + the difference is small if the number of components is large enough (but + still much smaller than the original vector dimensionality). As a + result, PCA is used. + @param vec coordinates of the vectors in the principal component + subspace, the layout and size are the same as of PCA::project output + vectors. + */ + Mat backProject(InputArray vec) const; + + /** @overload + @param vec coordinates of the vectors in the principal component + subspace, the layout and size are the same as of PCA::project output + vectors. + @param result reconstructed vectors; the layout and size are the same as + of PCA::project input vectors. + */ + void backProject(InputArray vec, OutputArray result) const; + + /** @brief write PCA objects + + Writes @ref eigenvalues @ref eigenvectors and @ref mean to specified FileStorage + */ + void write(FileStorage& fs) const; + + /** @brief load PCA objects + + Loads @ref eigenvalues @ref eigenvectors and @ref mean from specified FileNode + */ + void read(const FileNode& fn); + + Mat eigenvectors; //!< eigenvectors of the covariation matrix + Mat eigenvalues; //!< eigenvalues of the covariation matrix + Mat mean; //!< mean value subtracted before the projection and added after the back projection +}; + +/** @example samples/cpp/pca.cpp +An example using %PCA for dimensionality reduction while maintaining an amount of variance +*/ + +/** @example samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp +Check @ref tutorial_introduction_to_pca "the corresponding tutorial" for more details +*/ + +/** +@brief Linear Discriminant Analysis +@todo document this class +*/ +class CV_EXPORTS LDA +{ +public: + /** @brief constructor + Initializes a LDA with num_components (default 0). + */ + explicit LDA(int num_components = 0); + + /** Initializes and performs a Discriminant Analysis with Fisher's + Optimization Criterion on given data in src and corresponding labels + in labels. If 0 (or less) number of components are given, they are + automatically determined for given data in computation. + */ + LDA(InputArrayOfArrays src, InputArray labels, int num_components = 0); + + /** Serializes this object to a given filename. + */ + void save(const String& filename) const; + + /** Deserializes this object from a given filename. + */ + void load(const String& filename); + + /** Serializes this object to a given cv::FileStorage. + */ + void save(FileStorage& fs) const; + + /** Deserializes this object from a given cv::FileStorage. + */ + void load(const FileStorage& node); + + /** destructor + */ + ~LDA(); + + /** Compute the discriminants for data in src (row aligned) and labels. + */ + void compute(InputArrayOfArrays src, InputArray labels); + + /** Projects samples into the LDA subspace. + src may be one or more row aligned samples. + */ + Mat project(InputArray src); + + /** Reconstructs projections from the LDA subspace. + src may be one or more row aligned projections. + */ + Mat reconstruct(InputArray src); + + /** Returns the eigenvectors of this LDA. + */ + Mat eigenvectors() const { return _eigenvectors; } + + /** Returns the eigenvalues of this LDA. + */ + Mat eigenvalues() const { return _eigenvalues; } + + static Mat subspaceProject(InputArray W, InputArray mean, InputArray src); + static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src); + +protected: + int _num_components; + Mat _eigenvectors; + Mat _eigenvalues; + void lda(InputArrayOfArrays src, InputArray labels); +}; + +/** @brief Singular Value Decomposition + +Class for computing Singular Value Decomposition of a floating-point +matrix. The Singular Value Decomposition is used to solve least-square +problems, under-determined linear systems, invert matrices, compute +condition numbers, and so on. + +If you want to compute a condition number of a matrix or an absolute value of +its determinant, you do not need `u` and `vt`. You can pass +flags=SVD::NO_UV|... . Another flag SVD::FULL_UV indicates that full-size u +and vt must be computed, which is not necessary most of the time. + +@sa invert, solve, eigen, determinant +*/ +class CV_EXPORTS SVD +{ +public: + enum Flags { + /** allow the algorithm to modify the decomposed matrix; it can save space and speed up + processing. currently ignored. */ + MODIFY_A = 1, + /** indicates that only a vector of singular values `w` is to be processed, while u and vt + will be set to empty matrices */ + NO_UV = 2, + /** when the matrix is not square, by default the algorithm produces u and vt matrices of + sufficiently large size for the further A reconstruction; if, however, FULL_UV flag is + specified, u and vt will be full-size square orthogonal matrices.*/ + FULL_UV = 4 + }; + + /** @brief the default constructor + + initializes an empty SVD structure + */ + SVD(); + + /** @overload + initializes an empty SVD structure and then calls SVD::operator() + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param flags operation flags (SVD::Flags) + */ + SVD( InputArray src, int flags = 0 ); + + /** @brief the operator that performs SVD. The previously allocated u, w and vt are released. + + The operator performs the singular value decomposition of the supplied + matrix. The u,`vt` , and the vector of singular values w are stored in + the structure. The same SVD structure can be reused many times with + different matrices. Each time, if needed, the previous u,`vt` , and w + are reclaimed and the new matrices are created, which is all handled by + Mat::create. + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param flags operation flags (SVD::Flags) + */ + SVD& operator ()( InputArray src, int flags = 0 ); + + /** @brief decomposes matrix and stores the results to user-provided matrices + + The methods/functions perform SVD of matrix. Unlike SVD::SVD constructor + and SVD::operator(), they store the results to the user-provided + matrices: + + @code{.cpp} + Mat A, w, u, vt; + SVD::compute(A, w, u, vt); + @endcode + + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param w calculated singular values + @param u calculated left singular vectors + @param vt transposed matrix of right singular vectors + @param flags operation flags - see SVD::Flags. + */ + static void compute( InputArray src, OutputArray w, + OutputArray u, OutputArray vt, int flags = 0 ); + + /** @overload + computes singular values of a matrix + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param w calculated singular values + @param flags operation flags - see SVD::Flags. + */ + static void compute( InputArray src, OutputArray w, int flags = 0 ); + + /** @brief performs back substitution + */ + static void backSubst( InputArray w, InputArray u, + InputArray vt, InputArray rhs, + OutputArray dst ); + + /** @brief solves an under-determined singular linear system + + The method finds a unit-length solution x of a singular linear system + A\*x = 0. Depending on the rank of A, there can be no solutions, a + single solution or an infinite number of solutions. In general, the + algorithm solves the following problem: + \f[dst = \arg \min _{x: \| x \| =1} \| src \cdot x \|\f] + @param src left-hand-side matrix. + @param dst found solution. + */ + static void solveZ( InputArray src, OutputArray dst ); + + /** @brief performs a singular value back substitution. + + The method calculates a back substitution for the specified right-hand + side: + + \f[\texttt{x} = \texttt{vt} ^T \cdot diag( \texttt{w} )^{-1} \cdot \texttt{u} ^T \cdot \texttt{rhs} \sim \texttt{A} ^{-1} \cdot \texttt{rhs}\f] + + Using this technique you can either get a very accurate solution of the + convenient linear system, or the best (in the least-squares terms) + pseudo-solution of an overdetermined linear system. + + @param rhs right-hand side of a linear system (u\*w\*v')\*dst = rhs to + be solved, where A has been previously decomposed. + + @param dst found solution of the system. + + @note Explicit SVD with the further back substitution only makes sense + if you need to solve many linear systems with the same left-hand side + (for example, src ). If all you need is to solve a single system + (possibly with multiple rhs immediately available), simply call solve + add pass #DECOMP_SVD there. It does absolutely the same thing. + */ + void backSubst( InputArray rhs, OutputArray dst ) const; + + /** @todo document */ + template static + void compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt ); + + /** @todo document */ + template static + void compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w ); + + /** @todo document */ + template static + void backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u, const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs, Matx<_Tp, n, nb>& dst ); + + Mat u, w, vt; +}; + +/** @brief Random Number Generator + +Random number generator. It encapsulates the state (currently, a 64-bit +integer) and has methods to return scalar random values and to fill +arrays with random values. Currently it supports uniform and Gaussian +(normal) distributions. The generator uses Multiply-With-Carry +algorithm, introduced by G. Marsaglia ( + ). +Gaussian-distribution random numbers are generated using the Ziggurat +algorithm ( ), +introduced by G. Marsaglia and W. W. Tsang. +*/ +class CV_EXPORTS RNG +{ +public: + enum { UNIFORM = 0, + NORMAL = 1 + }; + + /** @brief constructor + + These are the RNG constructors. The first form sets the state to some + pre-defined value, equal to 2\*\*32-1 in the current implementation. The + second form sets the state to the specified value. If you passed state=0 + , the constructor uses the above default value instead to avoid the + singular random number sequence, consisting of all zeros. + */ + RNG(); + /** @overload + @param state 64-bit value used to initialize the RNG. + */ + RNG(uint64 state); + /**The method updates the state using the MWC algorithm and returns the + next 32-bit random number.*/ + unsigned next(); + + /**Each of the methods updates the state using the MWC algorithm and + returns the next random number of the specified type. In case of integer + types, the returned number is from the available value range for the + specified type. In case of floating-point types, the returned value is + from [0,1) range. + */ + operator uchar(); + /** @overload */ + operator schar(); + /** @overload */ + operator ushort(); + /** @overload */ + operator short(); + /** @overload */ + operator unsigned(); + /** @overload */ + operator int(); + /** @overload */ + operator float(); + /** @overload */ + operator double(); + + /** @brief returns a random integer sampled uniformly from [0, N). + + The methods transform the state using the MWC algorithm and return the + next random number. The first form is equivalent to RNG::next . The + second form returns the random number modulo N , which means that the + result is in the range [0, N) . + */ + unsigned operator ()(); + /** @overload + @param N upper non-inclusive boundary of the returned random number. + */ + unsigned operator ()(unsigned N); + + /** @brief returns uniformly distributed integer random number from [a,b) range + + The methods transform the state using the MWC algorithm and return the + next uniformly-distributed random number of the specified type, deduced + from the input parameter type, from the range [a, b) . There is a nuance + illustrated by the following sample: + + @code{.cpp} + RNG rng; + + // always produces 0 + double a = rng.uniform(0, 1); + + // produces double from [0, 1) + double a1 = rng.uniform((double)0, (double)1); + + // produces float from [0, 1) + float b = rng.uniform(0.f, 1.f); + + // produces double from [0, 1) + double c = rng.uniform(0., 1.); + + // may cause compiler error because of ambiguity: + // RNG::uniform(0, (int)0.999999)? or RNG::uniform((double)0, 0.99999)? + double d = rng.uniform(0, 0.999999); + @endcode + + The compiler does not take into account the type of the variable to + which you assign the result of RNG::uniform . The only thing that + matters to the compiler is the type of a and b parameters. So, if you + want a floating-point random number, but the range boundaries are + integer numbers, either put dots in the end, if they are constants, or + use explicit type cast operators, as in the a1 initialization above. + @param a lower inclusive boundary of the returned random number. + @param b upper non-inclusive boundary of the returned random number. + */ + int uniform(int a, int b); + /** @overload */ + float uniform(float a, float b); + /** @overload */ + double uniform(double a, double b); + + /** @brief Fills arrays with random numbers. + + @param mat 2D or N-dimensional matrix; currently matrices with more than + 4 channels are not supported by the methods, use Mat::reshape as a + possible workaround. + @param distType distribution type, RNG::UNIFORM or RNG::NORMAL. + @param a first distribution parameter; in case of the uniform + distribution, this is an inclusive lower boundary, in case of the normal + distribution, this is a mean value. + @param b second distribution parameter; in case of the uniform + distribution, this is a non-inclusive upper boundary, in case of the + normal distribution, this is a standard deviation (diagonal of the + standard deviation matrix or the full standard deviation matrix). + @param saturateRange pre-saturation flag; for uniform distribution only; + if true, the method will first convert a and b to the acceptable value + range (according to the mat datatype) and then will generate uniformly + distributed random numbers within the range [saturate(a), saturate(b)), + if saturateRange=false, the method will generate uniformly distributed + random numbers in the original range [a, b) and then will saturate them, + it means, for example, that + theRNG().fill(mat_8u, RNG::UNIFORM, -DBL_MAX, DBL_MAX) will likely + produce array mostly filled with 0's and 255's, since the range (0, 255) + is significantly smaller than [-DBL_MAX, DBL_MAX). + + Each of the methods fills the matrix with the random values from the + specified distribution. As the new numbers are generated, the RNG state + is updated accordingly. In case of multiple-channel images, every + channel is filled independently, which means that RNG cannot generate + samples from the multi-dimensional Gaussian distribution with + non-diagonal covariance matrix directly. To do that, the method + generates samples from multi-dimensional standard Gaussian distribution + with zero mean and identity covariation matrix, and then transforms them + using transform to get samples from the specified Gaussian distribution. + */ + void fill( InputOutputArray mat, int distType, InputArray a, InputArray b, bool saturateRange = false ); + + /** @brief Returns the next random number sampled from the Gaussian distribution + @param sigma standard deviation of the distribution. + + The method transforms the state using the MWC algorithm and returns the + next random number from the Gaussian distribution N(0,sigma) . That is, + the mean value of the returned random numbers is zero and the standard + deviation is the specified sigma . + */ + double gaussian(double sigma); + + uint64 state; + + bool operator ==(const RNG& other) const; +}; + +/** @brief Mersenne Twister random number generator + +Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c +@todo document +*/ +class CV_EXPORTS RNG_MT19937 +{ +public: + RNG_MT19937(); + RNG_MT19937(unsigned s); + void seed(unsigned s); + + unsigned next(); + + operator int(); + operator unsigned(); + operator float(); + operator double(); + + unsigned operator ()(unsigned N); + unsigned operator ()(); + + /** @brief returns uniformly distributed integer random number from [a,b) range*/ + int uniform(int a, int b); + /** @brief returns uniformly distributed floating-point random number from [a,b) range*/ + float uniform(float a, float b); + /** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range*/ + double uniform(double a, double b); + +private: + enum PeriodParameters {N = 624, M = 397}; + unsigned state[N]; + int mti; +}; + +//! @} core_array + +//! @addtogroup core_cluster +//! @{ + +/** @example samples/cpp/kmeans.cpp +An example on K-means clustering +*/ + +/** @brief Finds centers of clusters and groups input samples around the clusters. + +The function kmeans implements a k-means algorithm that finds the centers of cluster_count clusters +and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ contains a +0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix. + +@note +- (Python) An example on K-means clustering can be found at + opencv_source_code/samples/python/kmeans.py +@param data Data for clustering. An array of N-Dimensional points with float coordinates is needed. +Examples of this array can be: +- Mat points(count, 2, CV_32F); +- Mat points(count, 1, CV_32FC2); +- Mat points(1, count, CV_32FC2); +- std::vector\ points(sampleCount); +@param K Number of clusters to split the set by. +@param bestLabels Input/output integer array that stores the cluster indices for every sample. +@param criteria The algorithm termination criteria, that is, the maximum number of iterations and/or +the desired accuracy. The accuracy is specified as criteria.epsilon. As soon as each of the cluster +centers moves by less than criteria.epsilon on some iteration, the algorithm stops. +@param attempts Flag to specify the number of times the algorithm is executed using different +initial labellings. The algorithm returns the labels that yield the best compactness (see the last +function parameter). +@param flags Flag that can take values of cv::KmeansFlags +@param centers Output matrix of the cluster centers, one row per each cluster center. +@return The function returns the compactness measure that is computed as +\f[\sum _i \| \texttt{samples} _i - \texttt{centers} _{ \texttt{labels} _i} \| ^2\f] +after every attempt. The best (minimum) value is chosen and the corresponding labels and the +compactness value are returned by the function. Basically, you can use only the core of the +function, set the number of attempts to 1, initialize labels each time using a custom algorithm, +pass them with the ( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best +(most-compact) clustering. +*/ +CV_EXPORTS_W double kmeans( InputArray data, int K, InputOutputArray bestLabels, + TermCriteria criteria, int attempts, + int flags, OutputArray centers = noArray() ); + +//! @} core_cluster + +//! @addtogroup core_basic +//! @{ + +/////////////////////////////// Formatted output of cv::Mat /////////////////////////// + +/** @todo document */ +class CV_EXPORTS Formatted +{ +public: + virtual const char* next() = 0; + virtual void reset() = 0; + virtual ~Formatted(); +}; + +/** @todo document */ +class CV_EXPORTS Formatter +{ +public: + enum FormatType { + FMT_DEFAULT = 0, + FMT_MATLAB = 1, + FMT_CSV = 2, + FMT_PYTHON = 3, + FMT_NUMPY = 4, + FMT_C = 5 + }; + + virtual ~Formatter(); + + virtual Ptr format(const Mat& mtx) const = 0; + + virtual void set16fPrecision(int p = 4) = 0; + virtual void set32fPrecision(int p = 8) = 0; + virtual void set64fPrecision(int p = 16) = 0; + virtual void setMultiline(bool ml = true) = 0; + + static Ptr get(Formatter::FormatType fmt = FMT_DEFAULT); + +}; + +static inline +String& operator << (String& out, Ptr fmtd) +{ + fmtd->reset(); + for(const char* str = fmtd->next(); str; str = fmtd->next()) + out += cv::String(str); + return out; +} + +static inline +String& operator << (String& out, const Mat& mtx) +{ + return out << Formatter::get()->format(mtx); +} + +//////////////////////////////////////// Algorithm //////////////////////////////////// + +class CV_EXPORTS Algorithm; + +template struct ParamType {}; + + +/** @brief This is a base class for all more or less complex algorithms in OpenCV + +especially for classes of algorithms, for which there can be multiple implementations. The examples +are stereo correspondence (for which there are algorithms like block matching, semi-global block +matching, graph-cut etc.), background subtraction (which can be done using mixture-of-gaussians +models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck +etc.). + +Here is example of SimpleBlobDetector use in your application via Algorithm interface: +@snippet snippets/core_various.cpp Algorithm +*/ +class CV_EXPORTS_W Algorithm +{ +public: + Algorithm(); + virtual ~Algorithm(); + + /** @brief Clears the algorithm state + */ + CV_WRAP virtual void clear() {} + + /** @brief Stores algorithm parameters in a file storage + */ + virtual void write(FileStorage& fs) const { CV_UNUSED(fs); } + + /** @brief simplified API for language bindings + * @overload + */ + CV_WRAP void write(const Ptr& fs, const String& name = String()) const; + + /** @brief Reads algorithm parameters from a file storage + */ + CV_WRAP virtual void read(const FileNode& fn) { CV_UNUSED(fn); } + + /** @brief Returns true if the Algorithm is empty (e.g. in the very beginning or after unsuccessful read + */ + CV_WRAP virtual bool empty() const { return false; } + + /** @brief Reads algorithm from the file node + + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + cv::FileStorage fsRead("example.xml", FileStorage::READ); + Ptr svm = Algorithm::read(fsRead.root()); + @endcode + In order to make this method work, the derived class must overwrite Algorithm::read(const + FileNode& fn) and also have static create() method without parameters + (or with all the optional parameters) + */ + template static Ptr<_Tp> read(const FileNode& fn) + { + Ptr<_Tp> obj = _Tp::create(); + obj->read(fn); + return !obj->empty() ? obj : Ptr<_Tp>(); + } + + /** @brief Loads algorithm from the file + + @param filename Name of the file to read. + @param objname The optional name of the node to read (if empty, the first top-level node will be used) + + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + Ptr svm = Algorithm::load("my_svm_model.xml"); + @endcode + In order to make this method work, the derived class must overwrite Algorithm::read(const + FileNode& fn). + */ + template static Ptr<_Tp> load(const String& filename, const String& objname=String()) + { + FileStorage fs(filename, FileStorage::READ); + CV_Assert(fs.isOpened()); + FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname]; + if (fn.empty()) return Ptr<_Tp>(); + Ptr<_Tp> obj = _Tp::create(); + obj->read(fn); + return !obj->empty() ? obj : Ptr<_Tp>(); + } + + /** @brief Loads algorithm from a String + + @param strModel The string variable containing the model you want to load. + @param objname The optional name of the node to read (if empty, the first top-level node will be used) + + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + Ptr svm = Algorithm::loadFromString(myStringModel); + @endcode + */ + template static Ptr<_Tp> loadFromString(const String& strModel, const String& objname=String()) + { + FileStorage fs(strModel, FileStorage::READ + FileStorage::MEMORY); + FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname]; + Ptr<_Tp> obj = _Tp::create(); + obj->read(fn); + return !obj->empty() ? obj : Ptr<_Tp>(); + } + + /** Saves the algorithm to a file. + In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */ + CV_WRAP virtual void save(const String& filename) const; + + /** Returns the algorithm string identifier. + This string is used as top level xml/yml node tag when the object is saved to a file or string. */ + CV_WRAP virtual String getDefaultName() const; + +protected: + void writeFormat(FileStorage& fs) const; +}; + +enum struct Param { + INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7, + UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12 +}; + + + +template<> struct ParamType +{ + typedef bool const_param_type; + typedef bool member_type; + + static const Param type = Param::BOOLEAN; +}; + +template<> struct ParamType +{ + typedef int const_param_type; + typedef int member_type; + + static const Param type = Param::INT; +}; + +template<> struct ParamType +{ + typedef double const_param_type; + typedef double member_type; + + static const Param type = Param::REAL; +}; + +template<> struct ParamType +{ + typedef const String& const_param_type; + typedef String member_type; + + static const Param type = Param::STRING; +}; + +template<> struct ParamType +{ + typedef const Mat& const_param_type; + typedef Mat member_type; + + static const Param type = Param::MAT; +}; + +template<> struct ParamType > +{ + typedef const std::vector& const_param_type; + typedef std::vector member_type; + + static const Param type = Param::MAT_VECTOR; +}; + +template<> struct ParamType +{ + typedef const Ptr& const_param_type; + typedef Ptr member_type; + + static const Param type = Param::ALGORITHM; +}; + +template<> struct ParamType +{ + typedef float const_param_type; + typedef float member_type; + + static const Param type = Param::FLOAT; +}; + +template<> struct ParamType +{ + typedef unsigned const_param_type; + typedef unsigned member_type; + + static const Param type = Param::UNSIGNED_INT; +}; + +template<> struct ParamType +{ + typedef uint64 const_param_type; + typedef uint64 member_type; + + static const Param type = Param::UINT64; +}; + +template<> struct ParamType +{ + typedef uchar const_param_type; + typedef uchar member_type; + + static const Param type = Param::UCHAR; +}; + +template<> struct ParamType +{ + typedef const Scalar& const_param_type; + typedef Scalar member_type; + + static const Param type = Param::SCALAR; +}; + +template +struct ParamType<_Tp, typename std::enable_if< std::is_enum<_Tp>::value >::type> +{ + typedef typename std::underlying_type<_Tp>::type const_param_type; + typedef typename std::underlying_type<_Tp>::type member_type; + + static const Param type = Param::INT; +}; + +//! @} core_basic + +} //namespace cv + +#include "opencv2/core/operations.hpp" +#include "opencv2/core/cvstd.inl.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/core/optim.hpp" +#include "opencv2/core/ovx.hpp" + +#endif /*OPENCV_CORE_HPP*/ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/affine.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/affine.hpp new file mode 100644 index 0000000..1806382 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/affine.hpp @@ -0,0 +1,678 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_AFFINE3_HPP +#define OPENCV_CORE_AFFINE3_HPP + +#ifdef __cplusplus + +#include + +namespace cv +{ + +//! @addtogroup core +//! @{ + + /** @brief Affine transform + * + * It represents a 4x4 homogeneous transformation matrix \f$T\f$ + * + * \f[T = + * \begin{bmatrix} + * R & t\\ + * 0 & 1\\ + * \end{bmatrix} + * \f] + * + * where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector. + * + * You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector, + * which is converted to a 3x3 rotation matrix by the Rodrigues formula. + * + * To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation + * angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use + * + * @code + * cv::Vec3f r, t; + * cv::Affine3f T(r, t); + * @endcode + * + * If you already have the rotation matrix \f$R\f$, then you can use + * + * @code + * cv::Matx33f R; + * cv::Affine3f T(R, t); + * @endcode + * + * To extract the rotation matrix \f$R\f$ from \f$T\f$, use + * + * @code + * cv::Matx33f R = T.rotation(); + * @endcode + * + * To extract the translation vector \f$t\f$ from \f$T\f$, use + * + * @code + * cv::Vec3f t = T.translation(); + * @endcode + * + * To extract the rotation vector \f$r\f$ from \f$T\f$, use + * + * @code + * cv::Vec3f r = T.rvec(); + * @endcode + * + * Note that since the mapping from rotation vectors to rotation matrices + * is many to one. The returned rotation vector is not necessarily the one + * you used before to set the matrix. + * + * If you have two transformations \f$T = T_1 * T_2\f$, use + * + * @code + * cv::Affine3f T, T1, T2; + * T = T2.concatenate(T1); + * @endcode + * + * To get the inverse transform of \f$T\f$, use + * + * @code + * cv::Affine3f T, T_inv; + * T_inv = T.inv(); + * @endcode + * + */ + template + class Affine3 + { + public: + typedef T float_type; + typedef Matx Mat3; + typedef Matx Mat4; + typedef Vec Vec3; + + //! Default constructor. It represents a 4x4 identity matrix. + Affine3(); + + //! Augmented affine matrix + Affine3(const Mat4& affine); + + /** + * The resulting 4x4 matrix is + * + * \f[ + * \begin{bmatrix} + * R & t\\ + * 0 & 1\\ + * \end{bmatrix} + * \f] + * + * @param R 3x3 rotation matrix. + * @param t 3x1 translation vector. + */ + Affine3(const Mat3& R, const Vec3& t = Vec3::all(0)); + + /** + * Rodrigues vector. + * + * The last row of the current matrix is set to [0,0,0,1]. + * + * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length + * indicates the rotation angle in radian (using right hand rule). + * @param t 3x1 translation vector. + */ + Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0)); + + /** + * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix. + * + * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4. + * + * @param data 1-channel matrix. + * when it is 4x4, it is copied to the current matrix and t is not used. + * When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used. + * When it is 3x3, it is copied to the upper left 3x3 part of the current matrix. + * When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used + * to compute a 3x3 rotation matrix. + * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4. + */ + explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0)); + + //! From 16-element array + explicit Affine3(const float_type* vals); + + //! Create an 4x4 identity transform + static Affine3 Identity(); + + /** + * Rotation matrix. + * + * Copy the rotation matrix to the upper left 3x3 part of the current matrix. + * The remaining elements of the current matrix are not changed. + * + * @param R 3x3 rotation matrix. + * + */ + void rotation(const Mat3& R); + + /** + * Rodrigues vector. + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and + * its length indicates the rotation angle in radian (using the right thumb convention). + */ + void rotation(const Vec3& rvec); + + /** + * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix. + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param data 1-channel matrix. + * When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix. + * When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula + * is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix. + */ + void rotation(const Mat& data); + + /** + * Copy the 3x3 matrix L to the upper left part of the current matrix + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param L 3x3 matrix. + */ + void linear(const Mat3& L); + + /** + * Copy t to the first three elements of the last column of the current matrix + * + * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected. + * + * @param t 3x1 translation vector. + */ + void translation(const Vec3& t); + + //! @return the upper left 3x3 part + Mat3 rotation() const; + + //! @return the upper left 3x3 part + Mat3 linear() const; + + //! @return the upper right 3x1 part + Vec3 translation() const; + + //! Rodrigues vector. + //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix. + //! @warning Since the mapping between rotation vectors and rotation matrices is many to one, + //! this function returns only one rotation vector that represents the current rotation matrix, + //! which is not necessarily the same one set by `rotation(const Vec3& rvec)`. + Vec3 rvec() const; + + //! @return the inverse of the current matrix. + Affine3 inv(int method = cv::DECOMP_SVD) const; + + //! a.rotate(R) is equivalent to Affine(R, 0) * a; + Affine3 rotate(const Mat3& R) const; + + //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a; + Affine3 rotate(const Vec3& rvec) const; + + //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix + Affine3 translate(const Vec3& t) const; + + //! a.concatenate(affine) is equivalent to affine * a; + Affine3 concatenate(const Affine3& affine) const; + + template operator Affine3() const; + + template Affine3 cast() const; + + Mat4 matrix; + +#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H + Affine3(const Eigen::Transform& affine); + Affine3(const Eigen::Transform& affine); + operator Eigen::Transform() const; + operator Eigen::Transform() const; +#endif + }; + + template static + Affine3 operator*(const Affine3& affine1, const Affine3& affine2); + + //! V is a 3-element vector with member fields x, y and z + template static + V operator*(const Affine3& affine, const V& vector); + + typedef Affine3 Affine3f; + typedef Affine3 Affine3d; + + static Vec3f operator*(const Affine3f& affine, const Vec3f& vector); + static Vec3d operator*(const Affine3d& affine, const Vec3d& vector); + + template class DataType< Affine3<_Tp> > + { + public: + typedef Affine3<_Tp> value_type; + typedef Affine3::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 16, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; + }; + + namespace traits { + template + struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; }; + template + struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; }; + } // namespace + +//! @} core + +} + +//! @cond IGNORED + +/////////////////////////////////////////////////////////////////////////////////// +// Implementation + +template inline +cv::Affine3::Affine3() + : matrix(Mat4::eye()) +{} + +template inline +cv::Affine3::Affine3(const Mat4& affine) + : matrix(affine) +{} + +template inline +cv::Affine3::Affine3(const Mat3& R, const Vec3& t) +{ + rotation(R); + translation(t); + matrix.val[12] = matrix.val[13] = matrix.val[14] = 0; + matrix.val[15] = 1; +} + +template inline +cv::Affine3::Affine3(const Vec3& _rvec, const Vec3& t) +{ + rotation(_rvec); + translation(t); + matrix.val[12] = matrix.val[13] = matrix.val[14] = 0; + matrix.val[15] = 1; +} + +template inline +cv::Affine3::Affine3(const cv::Mat& data, const Vec3& t) +{ + CV_Assert(data.type() == cv::traits::Type::value); + CV_Assert(data.channels() == 1); + + if (data.cols == 4 && data.rows == 4) + { + data.copyTo(matrix); + return; + } + else if (data.cols == 4 && data.rows == 3) + { + rotation(data(Rect(0, 0, 3, 3))); + translation(data(Rect(3, 0, 1, 3))); + } + else + { + rotation(data); + translation(t); + } + + matrix.val[12] = matrix.val[13] = matrix.val[14] = 0; + matrix.val[15] = 1; +} + +template inline +cv::Affine3::Affine3(const float_type* vals) : matrix(vals) +{} + +template inline +cv::Affine3 cv::Affine3::Identity() +{ + return Affine3(cv::Affine3::Mat4::eye()); +} + +template inline +void cv::Affine3::rotation(const Mat3& R) +{ + linear(R); +} + +template inline +void cv::Affine3::rotation(const Vec3& _rvec) +{ + double theta = norm(_rvec); + + if (theta < DBL_EPSILON) + rotation(Mat3::eye()); + else + { + double c = std::cos(theta); + double s = std::sin(theta); + double c1 = 1. - c; + double itheta = (theta != 0) ? 1./theta : 0.; + + Point3_ r = _rvec*itheta; + + Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z ); + Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 ); + + // R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x] + // where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0] + Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x; + + rotation(R); + } +} + +//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix; +template inline +void cv::Affine3::rotation(const cv::Mat& data) +{ + CV_Assert(data.type() == cv::traits::Type::value); + CV_Assert(data.channels() == 1); + + if (data.cols == 3 && data.rows == 3) + { + Mat3 R; + data.copyTo(R); + rotation(R); + } + else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3)) + { + Vec3 _rvec; + data.reshape(1, 3).copyTo(_rvec); + rotation(_rvec); + } + else + CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1"); +} + +template inline +void cv::Affine3::linear(const Mat3& L) +{ + matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1]; matrix.val[ 2] = L.val[2]; + matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4]; matrix.val[ 6] = L.val[5]; + matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7]; matrix.val[10] = L.val[8]; +} + +template inline +void cv::Affine3::translation(const Vec3& t) +{ + matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2]; +} + +template inline +typename cv::Affine3::Mat3 cv::Affine3::rotation() const +{ + return linear(); +} + +template inline +typename cv::Affine3::Mat3 cv::Affine3::linear() const +{ + typename cv::Affine3::Mat3 R; + R.val[0] = matrix.val[0]; R.val[1] = matrix.val[1]; R.val[2] = matrix.val[ 2]; + R.val[3] = matrix.val[4]; R.val[4] = matrix.val[5]; R.val[5] = matrix.val[ 6]; + R.val[6] = matrix.val[8]; R.val[7] = matrix.val[9]; R.val[8] = matrix.val[10]; + return R; +} + +template inline +typename cv::Affine3::Vec3 cv::Affine3::translation() const +{ + return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]); +} + +template inline +typename cv::Affine3::Vec3 cv::Affine3::rvec() const +{ + cv::Vec3d w; + cv::Matx33d u, vt, R = rotation(); + cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A); + R = u * vt; + + double rx = R.val[7] - R.val[5]; + double ry = R.val[2] - R.val[6]; + double rz = R.val[3] - R.val[1]; + + double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25); + double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5; + c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c; + double theta = std::acos(c); + + if( s < 1e-5 ) + { + if( c > 0 ) + rx = ry = rz = 0; + else + { + double t; + t = (R.val[0] + 1) * 0.5; + rx = std::sqrt(std::max(t, 0.0)); + t = (R.val[4] + 1) * 0.5; + ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0); + t = (R.val[8] + 1) * 0.5; + rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0); + + if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) ) + rz = -rz; + theta /= std::sqrt(rx*rx + ry*ry + rz*rz); + rx *= theta; + ry *= theta; + rz *= theta; + } + } + else + { + double vth = 1/(2*s); + vth *= theta; + rx *= vth; ry *= vth; rz *= vth; + } + + return cv::Vec3d(rx, ry, rz); +} + +template inline +cv::Affine3 cv::Affine3::inv(int method) const +{ + return matrix.inv(method); +} + +template inline +cv::Affine3 cv::Affine3::rotate(const Mat3& R) const +{ + Mat3 Lc = linear(); + Vec3 tc = translation(); + Mat4 result; + result.val[12] = result.val[13] = result.val[14] = 0; + result.val[15] = 1; + + for(int j = 0; j < 3; ++j) + { + for(int i = 0; i < 3; ++i) + { + float_type value = 0; + for(int k = 0; k < 3; ++k) + value += R(j, k) * Lc(k, i); + result(j, i) = value; + } + + result(j, 3) = R.row(j).dot(tc.t()); + } + return result; +} + +template inline +cv::Affine3 cv::Affine3::rotate(const Vec3& _rvec) const +{ + return rotate(Affine3f(_rvec).rotation()); +} + +template inline +cv::Affine3 cv::Affine3::translate(const Vec3& t) const +{ + Mat4 m = matrix; + m.val[ 3] += t[0]; + m.val[ 7] += t[1]; + m.val[11] += t[2]; + return m; +} + +template inline +cv::Affine3 cv::Affine3::concatenate(const Affine3& affine) const +{ + return (*this).rotate(affine.rotation()).translate(affine.translation()); +} + +template template inline +cv::Affine3::operator Affine3() const +{ + return Affine3(matrix); +} + +template template inline +cv::Affine3 cv::Affine3::cast() const +{ + return Affine3(matrix); +} + +template inline +cv::Affine3 cv::operator*(const cv::Affine3& affine1, const cv::Affine3& affine2) +{ + return affine2.concatenate(affine1); +} + +template inline +V cv::operator*(const cv::Affine3& affine, const V& v) +{ + const typename Affine3::Mat4& m = affine.matrix; + + V r; + r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3]; + r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7]; + r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11]; + return r; +} + +static inline +cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v) +{ + const cv::Matx44f& m = affine.matrix; + cv::Vec3f r; + r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3]; + r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7]; + r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11]; + return r; +} + +static inline +cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v) +{ + const cv::Matx44d& m = affine.matrix; + cv::Vec3d r; + r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3]; + r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7]; + r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11]; + return r; +} + + + +#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H + +template inline +cv::Affine3::Affine3(const Eigen::Transform& affine) +{ + cv::Mat(4, 4, cv::traits::Type::value, affine.matrix().data()).copyTo(matrix); +} + +template inline +cv::Affine3::Affine3(const Eigen::Transform& affine) +{ + Eigen::Transform a = affine; + cv::Mat(4, 4, cv::traits::Type::value, a.matrix().data()).copyTo(matrix); +} + +template inline +cv::Affine3::operator Eigen::Transform() const +{ + Eigen::Transform r; + cv::Mat hdr(4, 4, cv::traits::Type::value, r.matrix().data()); + cv::Mat(matrix, false).copyTo(hdr); + return r; +} + +template inline +cv::Affine3::operator Eigen::Transform() const +{ + return this->operator Eigen::Transform(); +} + +#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */ + +//! @endcond + +#endif /* __cplusplus */ + +#endif /* OPENCV_CORE_AFFINE3_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/async.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/async.hpp new file mode 100644 index 0000000..54560c7 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/async.hpp @@ -0,0 +1,105 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ASYNC_HPP +#define OPENCV_CORE_ASYNC_HPP + +#include + +#ifdef CV_CXX11 +//#include +#include +#endif + +namespace cv { + +/** @addtogroup core_async + +@{ +*/ + + +/** @brief Returns result of asynchronous operations + +Object has attached asynchronous state. +Assignment operator doesn't clone asynchronous state (it is shared between all instances). + +Result can be fetched via get() method only once. + +*/ +class CV_EXPORTS_W AsyncArray +{ +public: + ~AsyncArray() CV_NOEXCEPT; + CV_WRAP AsyncArray() CV_NOEXCEPT; + AsyncArray(const AsyncArray& o) CV_NOEXCEPT; + AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT; + CV_WRAP void release() CV_NOEXCEPT; + + /** Fetch the result. + @param[out] dst destination array + + Waits for result until container has valid result. + Throws exception if exception was stored as a result. + + Throws exception on invalid container state. + + @note Result or stored exception can be fetched only once. + */ + CV_WRAP void get(OutputArray dst) const; + + /** Retrieving the result with timeout + @param[out] dst destination array + @param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait + + @returns true if result is ready, false if the timeout has expired + + @note Result or stored exception can be fetched only once. + */ + bool get(OutputArray dst, int64 timeoutNs) const; + + CV_WRAP inline + bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); } + + bool wait_for(int64 timeoutNs) const; + + CV_WRAP inline + bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); } + + CV_WRAP bool valid() const CV_NOEXCEPT; + +#ifdef CV_CXX11 + inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; } + inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; } + + template + inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout) + { + return get(dst, (int64)(std::chrono::nanoseconds(timeout).count())); + } + + template + inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout) + { + return wait_for((int64)(std::chrono::nanoseconds(timeout).count())); + } + +#if 0 + std::future getFutureMat() const; + std::future getFutureUMat() const; +#endif +#endif + + + // PImpl + struct Impl; friend struct Impl; + inline void* _getImpl() const CV_NOEXCEPT { return p; } +protected: + Impl* p; +}; + + +//! @} +} // namespace +#endif // OPENCV_CORE_ASYNC_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/base.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/base.hpp new file mode 100644 index 0000000..a3a3e51 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/base.hpp @@ -0,0 +1,654 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2014, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_BASE_HPP +#define OPENCV_CORE_BASE_HPP + +#ifndef __cplusplus +# error base.hpp header must be compiled as C++ +#endif + +#include "opencv2/opencv_modules.hpp" + +#include +#include + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" + +namespace cv +{ + +//! @addtogroup core_utils +//! @{ + +namespace Error { +//! error codes +enum Code { + StsOk= 0, //!< everything is ok + StsBackTrace= -1, //!< pseudo error for back trace + StsError= -2, //!< unknown /unspecified error + StsInternal= -3, //!< internal error (bad state) + StsNoMem= -4, //!< insufficient memory + StsBadArg= -5, //!< function arg/param is bad + StsBadFunc= -6, //!< unsupported function + StsNoConv= -7, //!< iteration didn't converge + StsAutoTrace= -8, //!< tracing + HeaderIsNull= -9, //!< image header is NULL + BadImageSize= -10, //!< image size is invalid + BadOffset= -11, //!< offset is invalid + BadDataPtr= -12, //!< + BadStep= -13, //!< image step is wrong, this may happen for a non-continuous matrix. + BadModelOrChSeq= -14, //!< + BadNumChannels= -15, //!< bad number of channels, for example, some functions accept only single channel matrices. + BadNumChannel1U= -16, //!< + BadDepth= -17, //!< input image depth is not supported by the function + BadAlphaChannel= -18, //!< + BadOrder= -19, //!< number of dimensions is out of range + BadOrigin= -20, //!< incorrect input origin + BadAlign= -21, //!< incorrect input align + BadCallBack= -22, //!< + BadTileSize= -23, //!< + BadCOI= -24, //!< input COI is not supported + BadROISize= -25, //!< incorrect input roi + MaskIsTiled= -26, //!< + StsNullPtr= -27, //!< null pointer + StsVecLengthErr= -28, //!< incorrect vector length + StsFilterStructContentErr= -29, //!< incorrect filter structure content + StsKernelStructContentErr= -30, //!< incorrect transform kernel content + StsFilterOffsetErr= -31, //!< incorrect filter offset value + StsBadSize= -201, //!< the input/output structure size is incorrect + StsDivByZero= -202, //!< division by zero + StsInplaceNotSupported= -203, //!< in-place operation is not supported + StsObjectNotFound= -204, //!< request can't be completed + StsUnmatchedFormats= -205, //!< formats of input/output arrays differ + StsBadFlag= -206, //!< flag is wrong or not supported + StsBadPoint= -207, //!< bad CvPoint + StsBadMask= -208, //!< bad format of mask (neither 8uC1 nor 8sC1) + StsUnmatchedSizes= -209, //!< sizes of input/output structures do not match + StsUnsupportedFormat= -210, //!< the data format/type is not supported by the function + StsOutOfRange= -211, //!< some of parameters are out of range + StsParseError= -212, //!< invalid syntax/structure of the parsed file + StsNotImplemented= -213, //!< the requested function/feature is not implemented + StsBadMemBlock= -214, //!< an allocated block has been corrupted + StsAssert= -215, //!< assertion failed + GpuNotSupported= -216, //!< no CUDA support + GpuApiCallError= -217, //!< GPU API call error + OpenGlNotSupported= -218, //!< no OpenGL support + OpenGlApiCallError= -219, //!< OpenGL API call error + OpenCLApiCallError= -220, //!< OpenCL API call error + OpenCLDoubleNotSupported= -221, + OpenCLInitError= -222, //!< OpenCL initialization error + OpenCLNoAMDBlasFft= -223 +}; +} //Error + +//! @} core_utils + +//! @addtogroup core_array +//! @{ + +//! matrix decomposition types +enum DecompTypes { + /** Gaussian elimination with the optimal pivot element chosen. */ + DECOMP_LU = 0, + /** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix + src1 can be singular */ + DECOMP_SVD = 1, + /** eigenvalue decomposition; the matrix src1 must be symmetrical */ + DECOMP_EIG = 2, + /** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively + defined */ + DECOMP_CHOLESKY = 3, + /** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */ + DECOMP_QR = 4, + /** while all the previous flags are mutually exclusive, this flag can be used together with + any of the previous; it means that the normal equations + \f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are + solved instead of the original system + \f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */ + DECOMP_NORMAL = 16 +}; + +/** norm types + +src1 and src2 denote input arrays. +*/ + +enum NormTypes { + /** + \f[ + norm = \forkthree + {\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } + {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } + {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) } + \f] + */ + NORM_INF = 1, + /** + \f[ + norm = \forkthree + {\| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\)} + { \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) } + \f]*/ + NORM_L1 = 2, + /** + \f[ + norm = \forkthree + { \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) } + { \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) } + \f] + */ + NORM_L2 = 4, + /** + \f[ + norm = \forkthree + { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if \(\texttt{normType} = \texttt{NORM_L2SQR}\)} + { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} = \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if \(\texttt{normType} = \texttt{NORM_L2SQR}\) } + { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) } + \f] + */ + NORM_L2SQR = 5, + /** + In the case of one input array, calculates the Hamming distance of the array from zero, + In the case of two input arrays, calculates the Hamming distance between the arrays. + */ + NORM_HAMMING = 6, + /** + Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will + be added and treated as a single bit to be used in the same calculation as NORM_HAMMING. + */ + NORM_HAMMING2 = 7, + NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags + NORM_RELATIVE = 8, //!< flag + NORM_MINMAX = 32 //!< flag + }; + +//! comparison types +enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2. + CMP_GT = 1, //!< src1 is greater than src2. + CMP_GE = 2, //!< src1 is greater than or equal to src2. + CMP_LT = 3, //!< src1 is less than src2. + CMP_LE = 4, //!< src1 is less than or equal to src2. + CMP_NE = 5 //!< src1 is unequal to src2. + }; + +//! generalized matrix multiplication flags +enum GemmFlags { GEMM_1_T = 1, //!< transposes src1 + GEMM_2_T = 2, //!< transposes src2 + GEMM_3_T = 4 //!< transposes src3 + }; + +enum DftFlags { + /** performs an inverse 1D or 2D transform instead of the default forward + transform. */ + DFT_INVERSE = 1, + /** scales the result: divide it by the number of array elements. Normally, it is + combined with DFT_INVERSE. */ + DFT_SCALE = 2, + /** performs a forward or inverse transform of every individual row of the input + matrix; this flag enables you to transform multiple vectors simultaneously and can be used to + decrease the overhead (which is sometimes several times larger than the processing itself) to + perform 3D and higher-dimensional transformations and so forth.*/ + DFT_ROWS = 4, + /** performs a forward transformation of 1D or 2D real array; the result, + though being a complex array, has complex-conjugate symmetry (*CCS*, see the function + description below for details), and such an array can be packed into a real array of the same + size as input, which is the fastest option and which is what the function does by default; + however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) - + pass the flag to enable the function to produce a full-size complex output array. */ + DFT_COMPLEX_OUTPUT = 16, + /** performs an inverse transformation of a 1D or 2D complex array; the + result is normally a complex array of the same size, however, if the input array has + conjugate-complex symmetry (for example, it is a result of forward transformation with + DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not + check whether the input is symmetrical or not, you can pass the flag and then the function + will assume the symmetry and produce the real output array (note that when the input is packed + into a real array and inverse transformation is executed, the function treats the input as a + packed complex-conjugate symmetrical array, and the output will also be a real array). */ + DFT_REAL_OUTPUT = 32, + /** specifies that input is complex input. If this flag is set, the input must have 2 channels. + On the other hand, for backwards compatibility reason, if input has 2 channels, input is + already considered complex. */ + DFT_COMPLEX_INPUT = 64, + /** performs an inverse 1D or 2D transform instead of the default forward transform. */ + DCT_INVERSE = DFT_INVERSE, + /** performs a forward or inverse transform of every individual row of the input + matrix. This flag enables you to transform multiple vectors simultaneously and can be used to + decrease the overhead (which is sometimes several times larger than the processing itself) to + perform 3D and higher-dimensional transforms and so forth.*/ + DCT_ROWS = DFT_ROWS +}; + +//! Various border types, image boundaries are denoted with `|` +//! @see borderInterpolate, copyMakeBorder +enum BorderTypes { + BORDER_CONSTANT = 0, //!< `iiiiii|abcdefgh|iiiiiii` with some specified `i` + BORDER_REPLICATE = 1, //!< `aaaaaa|abcdefgh|hhhhhhh` + BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb` + BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg` + BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba` + BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno` + + BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 + BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 + BORDER_ISOLATED = 16 //!< do not look outside of ROI +}; + +//! @} core_array + +//! @addtogroup core_utils +//! @{ + +/*! @brief Signals an error and raises the exception. + +By default the function prints information about the error to stderr, +then it either stops if setBreakOnError() had been called before or raises the exception. +It is possible to alternate error processing by using redirectError(). +@param _code - error code (Error::Code) +@param _err - error description +@param _func - function name. Available only when the compiler supports getting it +@param _file - source file name where the error has occurred +@param _line - line number in the source file where the error has occurred +@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert + */ +CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line); + +#ifdef CV_STATIC_ANALYSIS + +// In practice, some macro are not processed correctly (noreturn is not detected). +// We need to use simplified definition for them. +#define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0) +#define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0) +#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0) + +#else // CV_STATIC_ANALYSIS + +/** @brief Call the error handler. + +Currently, the error handler prints the error code and the error message to the standard +error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that +the execution stack and all the parameters can be analyzed by the debugger. In the Release +configuration, the exception is thrown. + +@param code one of Error::Code +@param msg error message +*/ +#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ ) + +/** @brief Call the error handler. + +This macro can be used to construct an error message on-fly to include some dynamic information, +for example: +@code + // note the extra parentheses around the formatted text message + CV_Error_(Error::StsOutOfRange, + ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue)); +@endcode +@param code one of Error::Code +@param args printf-like formatted error message in parentheses +*/ +#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ ) + +/** @brief Checks a condition at runtime and throws exception if it fails + +The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros +raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release +configurations while CV_DbgAssert is only retained in the Debug configuration. +*/ +#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0) + +#endif // CV_STATIC_ANALYSIS + +//! @cond IGNORED +#if !defined(__OPENCV_BUILD) // TODO: backward compatibility only +#ifndef CV_ErrorNoReturn +#define CV_ErrorNoReturn CV_Error +#endif +#ifndef CV_ErrorNoReturn_ +#define CV_ErrorNoReturn_ CV_Error_ +#endif +#endif + +#define CV_Assert_1 CV_Assert +#define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ )) +#define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ )) +#define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ )) +#define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ )) +#define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ )) +#define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ )) +#define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ )) +#define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ )) +#define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ )) + +#define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0) + +//! @endcond + +#if defined _DEBUG || defined CV_STATIC_ANALYSIS +# define CV_DbgAssert(expr) CV_Assert(expr) +#else +/** replaced with CV_Assert(expr) in Debug configuration */ +# define CV_DbgAssert(expr) +#endif + +/* + * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor + * bit count of A exclusive XOR'ed with B + */ +struct CV_EXPORTS Hamming +{ + static const NormTypes normType = NORM_HAMMING; + typedef unsigned char ValueType; + typedef int ResultType; + + /** this will count the bits in a ^ b + */ + ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const; +}; + +typedef Hamming HammingLUT; + +/////////////////////////////////// inline norms //////////////////////////////////// + +template inline _Tp cv_abs(_Tp x) { return std::abs(x); } +inline int cv_abs(uchar x) { return x; } +inline int cv_abs(schar x) { return std::abs(x); } +inline int cv_abs(ushort x) { return x; } +inline int cv_abs(short x) { return std::abs(x); } + +template static inline +_AccTp normL2Sqr(const _Tp* a, int n) +{ + _AccTp s = 0; + int i=0; +#if CV_ENABLE_UNROLLED + for( ; i <= n - 4; i += 4 ) + { + _AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3]; + s += v0*v0 + v1*v1 + v2*v2 + v3*v3; + } +#endif + for( ; i < n; i++ ) + { + _AccTp v = a[i]; + s += v*v; + } + return s; +} + +template static inline +_AccTp normL1(const _Tp* a, int n) +{ + _AccTp s = 0; + int i = 0; +#if CV_ENABLE_UNROLLED + for(; i <= n - 4; i += 4 ) + { + s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) + + (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]); + } +#endif + for( ; i < n; i++ ) + s += cv_abs(a[i]); + return s; +} + +template static inline +_AccTp normInf(const _Tp* a, int n) +{ + _AccTp s = 0; + for( int i = 0; i < n; i++ ) + s = std::max(s, (_AccTp)cv_abs(a[i])); + return s; +} + +template static inline +_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n) +{ + _AccTp s = 0; + int i= 0; +#if CV_ENABLE_UNROLLED + for(; i <= n - 4; i += 4 ) + { + _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]); + s += v0*v0 + v1*v1 + v2*v2 + v3*v3; + } +#endif + for( ; i < n; i++ ) + { + _AccTp v = _AccTp(a[i] - b[i]); + s += v*v; + } + return s; +} + +static inline float normL2Sqr(const float* a, const float* b, int n) +{ + float s = 0.f; + for( int i = 0; i < n; i++ ) + { + float v = a[i] - b[i]; + s += v*v; + } + return s; +} + +template static inline +_AccTp normL1(const _Tp* a, const _Tp* b, int n) +{ + _AccTp s = 0; + int i= 0; +#if CV_ENABLE_UNROLLED + for(; i <= n - 4; i += 4 ) + { + _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]); + s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3); + } +#endif + for( ; i < n; i++ ) + { + _AccTp v = _AccTp(a[i] - b[i]); + s += std::abs(v); + } + return s; +} + +inline float normL1(const float* a, const float* b, int n) +{ + float s = 0.f; + for( int i = 0; i < n; i++ ) + { + s += std::abs(a[i] - b[i]); + } + return s; +} + +inline int normL1(const uchar* a, const uchar* b, int n) +{ + int s = 0; + for( int i = 0; i < n; i++ ) + { + s += std::abs(a[i] - b[i]); + } + return s; +} + +template static inline +_AccTp normInf(const _Tp* a, const _Tp* b, int n) +{ + _AccTp s = 0; + for( int i = 0; i < n; i++ ) + { + _AccTp v0 = a[i] - b[i]; + s = std::max(s, std::abs(v0)); + } + return s; +} + +/** @brief Computes the cube root of an argument. + + The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly. + NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for + single-precision data. + @param val A function argument. + */ +CV_EXPORTS_W float cubeRoot(float val); + +/** @brief Calculates the angle of a 2D vector in degrees. + + The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured + in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees. + @param x x-coordinate of the vector. + @param y y-coordinate of the vector. + */ +CV_EXPORTS_W float fastAtan2(float y, float x); + +/** proxy for hal::LU */ +CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n); +/** proxy for hal::LU */ +CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n); +/** proxy for hal::Cholesky */ +CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n); +/** proxy for hal::Cholesky */ +CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n); + +////////////////// forward declarations for important OpenCV types ////////////////// + +//! @cond IGNORED + +template class Vec; +template class Matx; + +template class Complex; +template class Point_; +template class Point3_; +template class Size_; +template class Rect_; +template class Scalar_; + +class CV_EXPORTS RotatedRect; +class CV_EXPORTS Range; +class CV_EXPORTS TermCriteria; +class CV_EXPORTS KeyPoint; +class CV_EXPORTS DMatch; +class CV_EXPORTS RNG; + +class CV_EXPORTS Mat; +class CV_EXPORTS MatExpr; + +class CV_EXPORTS UMat; + +class CV_EXPORTS SparseMat; +typedef Mat MatND; + +template class Mat_; +template class SparseMat_; + +class CV_EXPORTS MatConstIterator; +class CV_EXPORTS SparseMatIterator; +class CV_EXPORTS SparseMatConstIterator; +template class MatIterator_; +template class MatConstIterator_; +template class SparseMatIterator_; +template class SparseMatConstIterator_; + +namespace ogl +{ + class CV_EXPORTS Buffer; + class CV_EXPORTS Texture2D; + class CV_EXPORTS Arrays; +} + +namespace cuda +{ + class CV_EXPORTS GpuMat; + class CV_EXPORTS HostMem; + class CV_EXPORTS Stream; + class CV_EXPORTS Event; +} + +namespace cudev +{ + template class GpuMat_; +} + +namespace ipp +{ +CV_EXPORTS unsigned long long getIppFeatures(); +CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL, + int line = 0); +CV_EXPORTS int getIppStatus(); +CV_EXPORTS String getIppErrorLocation(); +CV_EXPORTS_W bool useIPP(); +CV_EXPORTS_W void setUseIPP(bool flag); +CV_EXPORTS_W String getIppVersion(); + +// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results +// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests. +CV_EXPORTS_W bool useIPP_NotExact(); +CV_EXPORTS_W void setUseIPP_NotExact(bool flag); +#ifndef DISABLE_OPENCV_3_COMPATIBILITY +static inline bool useIPP_NE() { return useIPP_NotExact(); } +static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); } +#endif + +} // ipp + +//! @endcond + +//! @} core_utils + + + + +} // cv + +#include "opencv2/core/neon_utils.hpp" +#include "opencv2/core/vsx_utils.hpp" +#include "opencv2/core/check.hpp" + +#endif //OPENCV_CORE_BASE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/bindings_utils.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/bindings_utils.hpp new file mode 100644 index 0000000..474e23a --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/bindings_utils.hpp @@ -0,0 +1,85 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP +#define OPENCV_CORE_BINDINGS_UTILS_HPP + +#include +#include + +namespace cv { namespace utils { +//! @addtogroup core_utils +//! @{ + +CV_EXPORTS_W String dumpInputArray(InputArray argument); + +CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument); + +CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument); + +CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument); + +CV_WRAP static inline +String dumpBool(bool argument) +{ + return (argument) ? String("Bool: True") : String("Bool: False"); +} + +CV_WRAP static inline +String dumpInt(int argument) +{ + return cv::format("Int: %d", argument); +} + +CV_WRAP static inline +String dumpSizeT(size_t argument) +{ + std::ostringstream oss("size_t: ", std::ios::ate); + oss << argument; + return oss.str(); +} + +CV_WRAP static inline +String dumpFloat(float argument) +{ + return cv::format("Float: %.2f", argument); +} + +CV_WRAP static inline +String dumpDouble(double argument) +{ + return cv::format("Double: %.2f", argument); +} + +CV_WRAP static inline +String dumpCString(const char* argument) +{ + return cv::format("String: %s", argument); +} + +CV_WRAP static inline +String dumpString(const String& argument) +{ + return cv::format("String: %s", argument.c_str()); +} + +CV_WRAP static inline +AsyncArray testAsyncArray(InputArray argument) +{ + AsyncPromise p; + p.setValue(argument); + return p.getArrayResult(); +} + +CV_WRAP static inline +AsyncArray testAsyncException() +{ + AsyncPromise p; + return p.getArrayResult(); +} + +//! @} +}} // namespace + +#endif // OPENCV_CORE_BINDINGS_UTILS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/bufferpool.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/bufferpool.hpp new file mode 100644 index 0000000..4698e5d --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/bufferpool.hpp @@ -0,0 +1,40 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. + +#ifndef OPENCV_CORE_BUFFER_POOL_HPP +#define OPENCV_CORE_BUFFER_POOL_HPP + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4265) +#endif + +namespace cv +{ + +//! @addtogroup core +//! @{ + +class BufferPoolController +{ +protected: + ~BufferPoolController() { } +public: + virtual size_t getReservedSize() const = 0; + virtual size_t getMaxReservedSize() const = 0; + virtual void setMaxReservedSize(size_t size) = 0; + virtual void freeAllReservedBuffers() = 0; +}; + +//! @} + +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // OPENCV_CORE_BUFFER_POOL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/check.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/check.hpp new file mode 100644 index 0000000..d975223 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/check.hpp @@ -0,0 +1,160 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_CHECK_HPP +#define OPENCV_CORE_CHECK_HPP + +#include + +namespace cv { + +/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "" */ +CV_EXPORTS const char* depthToString(int depth); + +/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "" */ +CV_EXPORTS const String typeToString(int type); + + +//! @cond IGNORED +namespace detail { + +/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */ +CV_EXPORTS const char* depthToString_(int depth); + +/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */ +CV_EXPORTS const cv::String typeToString_(int type); + +enum TestOp { + TEST_CUSTOM = 0, + TEST_EQ = 1, + TEST_NE = 2, + TEST_LE = 3, + TEST_LT = 4, + TEST_GE = 5, + TEST_GT = 6, + CV__LAST_TEST_OP +}; + +struct CheckContext { + const char* func; + const char* file; + int line; + enum TestOp testOp; + const char* message; + const char* p1_str; + const char* p2_str; +}; + +#ifndef CV__CHECK_FILENAME +# define CV__CHECK_FILENAME __FILE__ +#endif + +#ifndef CV__CHECK_FUNCTION +# if defined _MSC_VER +# define CV__CHECK_FUNCTION __FUNCSIG__ +# elif defined __GNUC__ +# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__ +# else +# define CV__CHECK_FUNCTION "" +# endif +#endif + +#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__) +#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \ + static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \ + { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, "" message, "" p1_str, "" p2_str } + +CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_ v1, const Size_ v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx); + +CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_ v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx); + + +#define CV__TEST_EQ(v1, v2) ((v1) == (v2)) +#define CV__TEST_NE(v1, v2) ((v1) != (v2)) +#define CV__TEST_LE(v1, v2) ((v1) <= (v2)) +#define CV__TEST_LT(v1, v2) ((v1) < (v2)) +#define CV__TEST_GE(v1, v2) ((v1) >= (v2)) +#define CV__TEST_GT(v1, v2) ((v1) > (v2)) + +#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \ + if(CV__TEST_##op((v1), (v2))) ; else { \ + CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \ + cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \ + } \ +} while (0) + +#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \ + if(!!(test_expr)) ; else { \ + CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \ + cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \ + } \ +} while (0) + +} // namespace +//! @endcond + + +/// Supported values of these types: int, float, double +#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg) + +/// Check with additional "decoding" of type values in error message +#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg) +/// Check with additional "decoding" of depth values in error message +#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg) + +#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg) + +/// Example: type == CV_8UC1 || type == CV_8UC3 +#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg) + +/// Example: depth == CV_32F || depth == CV_64F +#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg) + +/// Example: v == A || v == B +#define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg) + +/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1") +// TODO define pretty-printers + +#ifndef NDEBUG +#define CV_DbgCheck(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg) +#define CV_DbgCheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg) +#else +#define CV_DbgCheck(v, test_expr, msg) do { } while (0) +#define CV_DbgCheckEQ(v1, v2, msg) do { } while (0) +#define CV_DbgCheckNE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckLE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckLT(v1, v2, msg) do { } while (0) +#define CV_DbgCheckGE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckGT(v1, v2, msg) do { } while (0) +#endif + +} // namespace + +#endif // OPENCV_CORE_CHECK_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/core.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/core.hpp new file mode 100644 index 0000000..4389183 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/core.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/core.hpp" diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/core_c.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/core_c.h new file mode 100644 index 0000000..09ac1e7 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/core_c.h @@ -0,0 +1,3125 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#ifndef OPENCV_CORE_C_H +#define OPENCV_CORE_C_H + +#include "opencv2/core/types_c.h" + +#ifdef __cplusplus +# ifdef _MSC_VER +/* disable warning C4190: 'function' has C-linkage specified, but returns UDT 'typename' + which is incompatible with C + + It is OK to disable it because we only extend few plain structures with + C++ constructors for simpler interoperability with C++ API of the library +*/ +# pragma warning(disable:4190) +# elif defined __clang__ && __clang_major__ >= 3 +# pragma GCC diagnostic ignored "-Wreturn-type-c-linkage" +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** @addtogroup core_c + @{ +*/ + +/****************************************************************************************\ +* Array allocation, deallocation, initialization and access to elements * +\****************************************************************************************/ + +/** `malloc` wrapper. + If there is no enough memory, the function + (as well as other OpenCV functions that call cvAlloc) + raises an error. */ +CVAPI(void*) cvAlloc( size_t size ); + +/** `free` wrapper. + Here and further all the memory releasing functions + (that all call cvFree) take double pointer in order to + to clear pointer to the data after releasing it. + Passing pointer to NULL pointer is Ok: nothing happens in this case +*/ +CVAPI(void) cvFree_( void* ptr ); +#define cvFree(ptr) (cvFree_(*(ptr)), *(ptr)=0) + +/** @brief Creates an image header but does not allocate the image data. + +@param size Image width and height +@param depth Image depth (see cvCreateImage ) +@param channels Number of channels (see cvCreateImage ) + */ +CVAPI(IplImage*) cvCreateImageHeader( CvSize size, int depth, int channels ); + +/** @brief Initializes an image header that was previously allocated. + +The returned IplImage\* points to the initialized header. +@param image Image header to initialize +@param size Image width and height +@param depth Image depth (see cvCreateImage ) +@param channels Number of channels (see cvCreateImage ) +@param origin Top-left IPL_ORIGIN_TL or bottom-left IPL_ORIGIN_BL +@param align Alignment for image rows, typically 4 or 8 bytes + */ +CVAPI(IplImage*) cvInitImageHeader( IplImage* image, CvSize size, int depth, + int channels, int origin CV_DEFAULT(0), + int align CV_DEFAULT(4)); + +/** @brief Creates an image header and allocates the image data. + +This function call is equivalent to the following code: +@code + header = cvCreateImageHeader(size, depth, channels); + cvCreateData(header); +@endcode +@param size Image width and height +@param depth Bit depth of image elements. See IplImage for valid depths. +@param channels Number of channels per pixel. See IplImage for details. This function only creates +images with interleaved channels. + */ +CVAPI(IplImage*) cvCreateImage( CvSize size, int depth, int channels ); + +/** @brief Deallocates an image header. + +This call is an analogue of : +@code + if(image ) + { + iplDeallocate(*image, IPL_IMAGE_HEADER | IPL_IMAGE_ROI); + *image = 0; + } +@endcode +but it does not use IPL functions by default (see the CV_TURN_ON_IPL_COMPATIBILITY macro). +@param image Double pointer to the image header + */ +CVAPI(void) cvReleaseImageHeader( IplImage** image ); + +/** @brief Deallocates the image header and the image data. + +This call is a shortened form of : +@code + if(*image ) + { + cvReleaseData(*image); + cvReleaseImageHeader(image); + } +@endcode +@param image Double pointer to the image header +*/ +CVAPI(void) cvReleaseImage( IplImage** image ); + +/** Creates a copy of IPL image (widthStep may differ) */ +CVAPI(IplImage*) cvCloneImage( const IplImage* image ); + +/** @brief Sets the channel of interest in an IplImage. + +If the ROI is set to NULL and the coi is *not* 0, the ROI is allocated. Most OpenCV functions do +*not* support the COI setting, so to process an individual image/matrix channel one may copy (via +cvCopy or cvSplit) the channel to a separate image/matrix, process it and then copy the result +back (via cvCopy or cvMerge) if needed. +@param image A pointer to the image header +@param coi The channel of interest. 0 - all channels are selected, 1 - first channel is selected, +etc. Note that the channel indices become 1-based. + */ +CVAPI(void) cvSetImageCOI( IplImage* image, int coi ); + +/** @brief Returns the index of the channel of interest. + +Returns the channel of interest of in an IplImage. Returned values correspond to the coi in +cvSetImageCOI. +@param image A pointer to the image header + */ +CVAPI(int) cvGetImageCOI( const IplImage* image ); + +/** @brief Sets an image Region Of Interest (ROI) for a given rectangle. + +If the original image ROI was NULL and the rect is not the whole image, the ROI structure is +allocated. + +Most OpenCV functions support the use of ROI and treat the image rectangle as a separate image. For +example, all of the pixel coordinates are counted from the top-left (or bottom-left) corner of the +ROI, not the original image. +@param image A pointer to the image header +@param rect The ROI rectangle + */ +CVAPI(void) cvSetImageROI( IplImage* image, CvRect rect ); + +/** @brief Resets the image ROI to include the entire image and releases the ROI structure. + +This produces a similar result to the following, but in addition it releases the ROI structure. : +@code + cvSetImageROI(image, cvRect(0, 0, image->width, image->height )); + cvSetImageCOI(image, 0); +@endcode +@param image A pointer to the image header + */ +CVAPI(void) cvResetImageROI( IplImage* image ); + +/** @brief Returns the image ROI. + +If there is no ROI set, cvRect(0,0,image-\>width,image-\>height) is returned. +@param image A pointer to the image header + */ +CVAPI(CvRect) cvGetImageROI( const IplImage* image ); + +/** @brief Creates a matrix header but does not allocate the matrix data. + +The function allocates a new matrix header and returns a pointer to it. The matrix data can then be +allocated using cvCreateData or set explicitly to user-allocated data via cvSetData. +@param rows Number of rows in the matrix +@param cols Number of columns in the matrix +@param type Type of the matrix elements, see cvCreateMat + */ +CVAPI(CvMat*) cvCreateMatHeader( int rows, int cols, int type ); + +#define CV_AUTOSTEP 0x7fffffff + +/** @brief Initializes a pre-allocated matrix header. + +This function is often used to process raw data with OpenCV matrix functions. For example, the +following code computes the matrix product of two matrices, stored as ordinary arrays: +@code + double a[] = { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12 }; + + double b[] = { 1, 5, 9, + 2, 6, 10, + 3, 7, 11, + 4, 8, 12 }; + + double c[9]; + CvMat Ma, Mb, Mc ; + + cvInitMatHeader(&Ma, 3, 4, CV_64FC1, a); + cvInitMatHeader(&Mb, 4, 3, CV_64FC1, b); + cvInitMatHeader(&Mc, 3, 3, CV_64FC1, c); + + cvMatMulAdd(&Ma, &Mb, 0, &Mc); + // the c array now contains the product of a (3x4) and b (4x3) +@endcode +@param mat A pointer to the matrix header to be initialized +@param rows Number of rows in the matrix +@param cols Number of columns in the matrix +@param type Type of the matrix elements, see cvCreateMat . +@param data Optional: data pointer assigned to the matrix header +@param step Optional: full row width in bytes of the assigned data. By default, the minimal +possible step is used which assumes there are no gaps between subsequent rows of the matrix. + */ +CVAPI(CvMat*) cvInitMatHeader( CvMat* mat, int rows, int cols, + int type, void* data CV_DEFAULT(NULL), + int step CV_DEFAULT(CV_AUTOSTEP) ); + +/** @brief Creates a matrix header and allocates the matrix data. + +The function call is equivalent to the following code: +@code + CvMat* mat = cvCreateMatHeader(rows, cols, type); + cvCreateData(mat); +@endcode +@param rows Number of rows in the matrix +@param cols Number of columns in the matrix +@param type The type of the matrix elements in the form +CV_\\C\ , where S=signed, U=unsigned, F=float. For +example, CV _ 8UC1 means the elements are 8-bit unsigned and the there is 1 channel, and CV _ +32SC2 means the elements are 32-bit signed and there are 2 channels. + */ +CVAPI(CvMat*) cvCreateMat( int rows, int cols, int type ); + +/** @brief Deallocates a matrix. + +The function decrements the matrix data reference counter and deallocates matrix header. If the data +reference counter is 0, it also deallocates the data. : +@code + if(*mat ) + cvDecRefData(*mat); + cvFree((void**)mat); +@endcode +@param mat Double pointer to the matrix + */ +CVAPI(void) cvReleaseMat( CvMat** mat ); + +/** @brief Decrements an array data reference counter. + +The function decrements the data reference counter in a CvMat or CvMatND if the reference counter + +pointer is not NULL. If the counter reaches zero, the data is deallocated. In the current +implementation the reference counter is not NULL only if the data was allocated using the +cvCreateData function. The counter will be NULL in other cases such as: external data was assigned +to the header using cvSetData, header is part of a larger matrix or image, or the header was +converted from an image or n-dimensional matrix header. +@param arr Pointer to an array header + */ +CV_INLINE void cvDecRefData( CvArr* arr ) +{ + if( CV_IS_MAT( arr )) + { + CvMat* mat = (CvMat*)arr; + mat->data.ptr = NULL; + if( mat->refcount != NULL && --*mat->refcount == 0 ) + cvFree( &mat->refcount ); + mat->refcount = NULL; + } + else if( CV_IS_MATND( arr )) + { + CvMatND* mat = (CvMatND*)arr; + mat->data.ptr = NULL; + if( mat->refcount != NULL && --*mat->refcount == 0 ) + cvFree( &mat->refcount ); + mat->refcount = NULL; + } +} + +/** @brief Increments array data reference counter. + +The function increments CvMat or CvMatND data reference counter and returns the new counter value if +the reference counter pointer is not NULL, otherwise it returns zero. +@param arr Array header + */ +CV_INLINE int cvIncRefData( CvArr* arr ) +{ + int refcount = 0; + if( CV_IS_MAT( arr )) + { + CvMat* mat = (CvMat*)arr; + if( mat->refcount != NULL ) + refcount = ++*mat->refcount; + } + else if( CV_IS_MATND( arr )) + { + CvMatND* mat = (CvMatND*)arr; + if( mat->refcount != NULL ) + refcount = ++*mat->refcount; + } + return refcount; +} + + +/** Creates an exact copy of the input matrix (except, may be, step value) */ +CVAPI(CvMat*) cvCloneMat( const CvMat* mat ); + + +/** @brief Returns matrix header corresponding to the rectangular sub-array of input image or matrix. + +The function returns header, corresponding to a specified rectangle of the input array. In other + +words, it allows the user to treat a rectangular part of input array as a stand-alone array. ROI is +taken into account by the function so the sub-array of ROI is actually extracted. +@param arr Input array +@param submat Pointer to the resultant sub-array header +@param rect Zero-based coordinates of the rectangle of interest + */ +CVAPI(CvMat*) cvGetSubRect( const CvArr* arr, CvMat* submat, CvRect rect ); +#define cvGetSubArr cvGetSubRect + +/** @brief Returns array row or row span. + +The function returns the header, corresponding to a specified row/row span of the input array. +cvGetRow(arr, submat, row) is a shortcut for cvGetRows(arr, submat, row, row+1). +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param start_row Zero-based index of the starting row (inclusive) of the span +@param end_row Zero-based index of the ending row (exclusive) of the span +@param delta_row Index step in the row span. That is, the function extracts every delta_row -th +row from start_row and up to (but not including) end_row . + */ +CVAPI(CvMat*) cvGetRows( const CvArr* arr, CvMat* submat, + int start_row, int end_row, + int delta_row CV_DEFAULT(1)); + +/** @overload +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param row Zero-based index of the selected row +*/ +CV_INLINE CvMat* cvGetRow( const CvArr* arr, CvMat* submat, int row ) +{ + return cvGetRows( arr, submat, row, row + 1, 1 ); +} + + +/** @brief Returns one of more array columns. + +The function returns the header, corresponding to a specified column span of the input array. That + +is, no data is copied. Therefore, any modifications of the submatrix will affect the original array. +If you need to copy the columns, use cvCloneMat. cvGetCol(arr, submat, col) is a shortcut for +cvGetCols(arr, submat, col, col+1). +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param start_col Zero-based index of the starting column (inclusive) of the span +@param end_col Zero-based index of the ending column (exclusive) of the span + */ +CVAPI(CvMat*) cvGetCols( const CvArr* arr, CvMat* submat, + int start_col, int end_col ); + +/** @overload +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param col Zero-based index of the selected column +*/ +CV_INLINE CvMat* cvGetCol( const CvArr* arr, CvMat* submat, int col ) +{ + return cvGetCols( arr, submat, col, col + 1 ); +} + +/** @brief Returns one of array diagonals. + +The function returns the header, corresponding to a specified diagonal of the input array. +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param diag Index of the array diagonal. Zero value corresponds to the main diagonal, -1 +corresponds to the diagonal above the main, 1 corresponds to the diagonal below the main, and so +forth. + */ +CVAPI(CvMat*) cvGetDiag( const CvArr* arr, CvMat* submat, + int diag CV_DEFAULT(0)); + +/** low-level scalar <-> raw data conversion functions */ +CVAPI(void) cvScalarToRawData( const CvScalar* scalar, void* data, int type, + int extend_to_12 CV_DEFAULT(0) ); + +CVAPI(void) cvRawDataToScalar( const void* data, int type, CvScalar* scalar ); + +/** @brief Creates a new matrix header but does not allocate the matrix data. + +The function allocates a header for a multi-dimensional dense array. The array data can further be +allocated using cvCreateData or set explicitly to user-allocated data via cvSetData. +@param dims Number of array dimensions +@param sizes Array of dimension sizes +@param type Type of array elements, see cvCreateMat + */ +CVAPI(CvMatND*) cvCreateMatNDHeader( int dims, const int* sizes, int type ); + +/** @brief Creates the header and allocates the data for a multi-dimensional dense array. + +This function call is equivalent to the following code: +@code + CvMatND* mat = cvCreateMatNDHeader(dims, sizes, type); + cvCreateData(mat); +@endcode +@param dims Number of array dimensions. This must not exceed CV_MAX_DIM (32 by default, but can be +changed at build time). +@param sizes Array of dimension sizes. +@param type Type of array elements, see cvCreateMat . + */ +CVAPI(CvMatND*) cvCreateMatND( int dims, const int* sizes, int type ); + +/** @brief Initializes a pre-allocated multi-dimensional array header. + +@param mat A pointer to the array header to be initialized +@param dims The number of array dimensions +@param sizes An array of dimension sizes +@param type Type of array elements, see cvCreateMat +@param data Optional data pointer assigned to the matrix header + */ +CVAPI(CvMatND*) cvInitMatNDHeader( CvMatND* mat, int dims, const int* sizes, + int type, void* data CV_DEFAULT(NULL) ); + +/** @brief Deallocates a multi-dimensional array. + +The function decrements the array data reference counter and releases the array header. If the +reference counter reaches 0, it also deallocates the data. : +@code + if(*mat ) + cvDecRefData(*mat); + cvFree((void**)mat); +@endcode +@param mat Double pointer to the array + */ +CV_INLINE void cvReleaseMatND( CvMatND** mat ) +{ + cvReleaseMat( (CvMat**)mat ); +} + +/** Creates a copy of CvMatND (except, may be, steps) */ +CVAPI(CvMatND*) cvCloneMatND( const CvMatND* mat ); + +/** @brief Creates sparse array. + +The function allocates a multi-dimensional sparse array. Initially the array contain no elements, +that is PtrND and other related functions will return 0 for every index. +@param dims Number of array dimensions. In contrast to the dense matrix, the number of dimensions is +practically unlimited (up to \f$2^{16}\f$ ). +@param sizes Array of dimension sizes +@param type Type of array elements. The same as for CvMat + */ +CVAPI(CvSparseMat*) cvCreateSparseMat( int dims, const int* sizes, int type ); + +/** @brief Deallocates sparse array. + +The function releases the sparse array and clears the array pointer upon exit. +@param mat Double pointer to the array + */ +CVAPI(void) cvReleaseSparseMat( CvSparseMat** mat ); + +/** Creates a copy of CvSparseMat (except, may be, zero items) */ +CVAPI(CvSparseMat*) cvCloneSparseMat( const CvSparseMat* mat ); + +/** @brief Initializes sparse array elements iterator. + +The function initializes iterator of sparse array elements and returns pointer to the first element, +or NULL if the array is empty. +@param mat Input array +@param mat_iterator Initialized iterator + */ +CVAPI(CvSparseNode*) cvInitSparseMatIterator( const CvSparseMat* mat, + CvSparseMatIterator* mat_iterator ); + +/** @brief Returns the next sparse matrix element + +The function moves iterator to the next sparse matrix element and returns pointer to it. In the +current version there is no any particular order of the elements, because they are stored in the +hash table. The sample below demonstrates how to iterate through the sparse matrix: +@code + // print all the non-zero sparse matrix elements and compute their sum + double sum = 0; + int i, dims = cvGetDims(sparsemat); + CvSparseMatIterator it; + CvSparseNode* node = cvInitSparseMatIterator(sparsemat, &it); + + for(; node != 0; node = cvGetNextSparseNode(&it)) + { + int* idx = CV_NODE_IDX(array, node); + float val = *(float*)CV_NODE_VAL(array, node); + printf("M"); + for(i = 0; i < dims; i++ ) + printf("[%d]", idx[i]); + printf("=%g\n", val); + + sum += val; + } + + printf("nTotal sum = %g\n", sum); +@endcode +@param mat_iterator Sparse array iterator + */ +CV_INLINE CvSparseNode* cvGetNextSparseNode( CvSparseMatIterator* mat_iterator ) +{ + if( mat_iterator->node->next ) + return mat_iterator->node = mat_iterator->node->next; + else + { + int idx; + for( idx = ++mat_iterator->curidx; idx < mat_iterator->mat->hashsize; idx++ ) + { + CvSparseNode* node = (CvSparseNode*)mat_iterator->mat->hashtable[idx]; + if( node ) + { + mat_iterator->curidx = idx; + return mat_iterator->node = node; + } + } + return NULL; + } +} + + +#define CV_MAX_ARR 10 + +/** matrix iterator: used for n-ary operations on dense arrays */ +typedef struct CvNArrayIterator +{ + int count; /**< number of arrays */ + int dims; /**< number of dimensions to iterate */ + CvSize size; /**< maximal common linear size: { width = size, height = 1 } */ + uchar* ptr[CV_MAX_ARR]; /**< pointers to the array slices */ + int stack[CV_MAX_DIM]; /**< for internal use */ + CvMatND* hdr[CV_MAX_ARR]; /**< pointers to the headers of the + matrices that are processed */ +} +CvNArrayIterator; + +#define CV_NO_DEPTH_CHECK 1 +#define CV_NO_CN_CHECK 2 +#define CV_NO_SIZE_CHECK 4 + +/** initializes iterator that traverses through several arrays simultaneously + (the function together with cvNextArraySlice is used for + N-ari element-wise operations) */ +CVAPI(int) cvInitNArrayIterator( int count, CvArr** arrs, + const CvArr* mask, CvMatND* stubs, + CvNArrayIterator* array_iterator, + int flags CV_DEFAULT(0) ); + +/** returns zero value if iteration is finished, non-zero (slice length) otherwise */ +CVAPI(int) cvNextNArraySlice( CvNArrayIterator* array_iterator ); + + +/** @brief Returns type of array elements. + +The function returns type of the array elements. In the case of IplImage the type is converted to +CvMat-like representation. For example, if the image has been created as: +@code + IplImage* img = cvCreateImage(cvSize(640, 480), IPL_DEPTH_8U, 3); +@endcode +The code cvGetElemType(img) will return CV_8UC3. +@param arr Input array + */ +CVAPI(int) cvGetElemType( const CvArr* arr ); + +/** @brief Return number of array dimensions + +The function returns the array dimensionality and the array of dimension sizes. In the case of +IplImage or CvMat it always returns 2 regardless of number of image/matrix rows. For example, the +following code calculates total number of array elements: +@code + int sizes[CV_MAX_DIM]; + int i, total = 1; + int dims = cvGetDims(arr, size); + for(i = 0; i < dims; i++ ) + total *= sizes[i]; +@endcode +@param arr Input array +@param sizes Optional output vector of the array dimension sizes. For 2d arrays the number of rows +(height) goes first, number of columns (width) next. + */ +CVAPI(int) cvGetDims( const CvArr* arr, int* sizes CV_DEFAULT(NULL) ); + + +/** @brief Returns array size along the specified dimension. + +@param arr Input array +@param index Zero-based dimension index (for matrices 0 means number of rows, 1 means number of +columns; for images 0 means height, 1 means width) + */ +CVAPI(int) cvGetDimSize( const CvArr* arr, int index ); + + +/** @brief Return pointer to a particular array element. + +The functions return a pointer to a specific array element. Number of array dimension should match +to the number of indices passed to the function except for cvPtr1D function that can be used for +sequential access to 1D, 2D or nD dense arrays. + +The functions can be used for sparse arrays as well - if the requested node does not exist they +create it and set it to zero. + +All these as well as other functions accessing array elements ( cvGetND , cvGetRealND , cvSet +, cvSetND , cvSetRealND ) raise an error in case if the element index is out of range. +@param arr Input array +@param idx0 The first zero-based component of the element index +@param type Optional output parameter: type of matrix elements + */ +CVAPI(uchar*) cvPtr1D( const CvArr* arr, int idx0, int* type CV_DEFAULT(NULL)); +/** @overload */ +CVAPI(uchar*) cvPtr2D( const CvArr* arr, int idx0, int idx1, int* type CV_DEFAULT(NULL) ); +/** @overload */ +CVAPI(uchar*) cvPtr3D( const CvArr* arr, int idx0, int idx1, int idx2, + int* type CV_DEFAULT(NULL)); +/** @overload +@param arr Input array +@param idx Array of the element indices +@param type Optional output parameter: type of matrix elements +@param create_node Optional input parameter for sparse matrices. Non-zero value of the parameter +means that the requested element is created if it does not exist already. +@param precalc_hashval Optional input parameter for sparse matrices. If the pointer is not NULL, +the function does not recalculate the node hash value, but takes it from the specified location. +It is useful for speeding up pair-wise operations (TODO: provide an example) +*/ +CVAPI(uchar*) cvPtrND( const CvArr* arr, const int* idx, int* type CV_DEFAULT(NULL), + int create_node CV_DEFAULT(1), + unsigned* precalc_hashval CV_DEFAULT(NULL)); + +/** @brief Return a specific array element. + +The functions return a specific array element. In the case of a sparse array the functions return 0 +if the requested node does not exist (no new node is created by the functions). +@param arr Input array +@param idx0 The first zero-based component of the element index + */ +CVAPI(CvScalar) cvGet1D( const CvArr* arr, int idx0 ); +/** @overload */ +CVAPI(CvScalar) cvGet2D( const CvArr* arr, int idx0, int idx1 ); +/** @overload */ +CVAPI(CvScalar) cvGet3D( const CvArr* arr, int idx0, int idx1, int idx2 ); +/** @overload +@param arr Input array +@param idx Array of the element indices +*/ +CVAPI(CvScalar) cvGetND( const CvArr* arr, const int* idx ); + +/** @brief Return a specific element of single-channel 1D, 2D, 3D or nD array. + +Returns a specific element of a single-channel array. If the array has multiple channels, a runtime +error is raised. Note that Get?D functions can be used safely for both single-channel and +multiple-channel arrays though they are a bit slower. + +In the case of a sparse array the functions return 0 if the requested node does not exist (no new +node is created by the functions). +@param arr Input array. Must have a single channel. +@param idx0 The first zero-based component of the element index + */ +CVAPI(double) cvGetReal1D( const CvArr* arr, int idx0 ); +/** @overload */ +CVAPI(double) cvGetReal2D( const CvArr* arr, int idx0, int idx1 ); +/** @overload */ +CVAPI(double) cvGetReal3D( const CvArr* arr, int idx0, int idx1, int idx2 ); +/** @overload +@param arr Input array. Must have a single channel. +@param idx Array of the element indices +*/ +CVAPI(double) cvGetRealND( const CvArr* arr, const int* idx ); + +/** @brief Change the particular array element. + +The functions assign the new value to a particular array element. In the case of a sparse array the +functions create the node if it does not exist yet. +@param arr Input array +@param idx0 The first zero-based component of the element index +@param value The assigned value + */ +CVAPI(void) cvSet1D( CvArr* arr, int idx0, CvScalar value ); +/** @overload */ +CVAPI(void) cvSet2D( CvArr* arr, int idx0, int idx1, CvScalar value ); +/** @overload */ +CVAPI(void) cvSet3D( CvArr* arr, int idx0, int idx1, int idx2, CvScalar value ); +/** @overload +@param arr Input array +@param idx Array of the element indices +@param value The assigned value +*/ +CVAPI(void) cvSetND( CvArr* arr, const int* idx, CvScalar value ); + +/** @brief Change a specific array element. + +The functions assign a new value to a specific element of a single-channel array. If the array has +multiple channels, a runtime error is raised. Note that the Set\*D function can be used safely for +both single-channel and multiple-channel arrays, though they are a bit slower. + +In the case of a sparse array the functions create the node if it does not yet exist. +@param arr Input array +@param idx0 The first zero-based component of the element index +@param value The assigned value + */ +CVAPI(void) cvSetReal1D( CvArr* arr, int idx0, double value ); +/** @overload */ +CVAPI(void) cvSetReal2D( CvArr* arr, int idx0, int idx1, double value ); +/** @overload */ +CVAPI(void) cvSetReal3D( CvArr* arr, int idx0, + int idx1, int idx2, double value ); +/** @overload +@param arr Input array +@param idx Array of the element indices +@param value The assigned value +*/ +CVAPI(void) cvSetRealND( CvArr* arr, const int* idx, double value ); + +/** clears element of ND dense array, + in case of sparse arrays it deletes the specified node */ +CVAPI(void) cvClearND( CvArr* arr, const int* idx ); + +/** @brief Returns matrix header for arbitrary array. + +The function returns a matrix header for the input array that can be a matrix - CvMat, an image - +IplImage, or a multi-dimensional dense array - CvMatND (the third option is allowed only if +allowND != 0) . In the case of matrix the function simply returns the input pointer. In the case of +IplImage\* or CvMatND it initializes the header structure with parameters of the current image ROI +and returns &header. Because COI is not supported by CvMat, it is returned separately. + +The function provides an easy way to handle both types of arrays - IplImage and CvMat using the same +code. Input array must have non-zero data pointer, otherwise the function will report an error. + +@note If the input array is IplImage with planar data layout and COI set, the function returns the +pointer to the selected plane and COI == 0. This feature allows user to process IplImage structures +with planar data layout, even though OpenCV does not support such images. +@param arr Input array +@param header Pointer to CvMat structure used as a temporary buffer +@param coi Optional output parameter for storing COI +@param allowND If non-zero, the function accepts multi-dimensional dense arrays (CvMatND\*) and +returns 2D matrix (if CvMatND has two dimensions) or 1D matrix (when CvMatND has 1 dimension or +more than 2 dimensions). The CvMatND array must be continuous. +@sa cvGetImage, cvarrToMat. + */ +CVAPI(CvMat*) cvGetMat( const CvArr* arr, CvMat* header, + int* coi CV_DEFAULT(NULL), + int allowND CV_DEFAULT(0)); + +/** @brief Returns image header for arbitrary array. + +The function returns the image header for the input array that can be a matrix (CvMat) or image +(IplImage). In the case of an image the function simply returns the input pointer. In the case of +CvMat it initializes an image_header structure with the parameters of the input matrix. Note that +if we transform IplImage to CvMat using cvGetMat and then transform CvMat back to IplImage using +this function, we will get different headers if the ROI is set in the original image. +@param arr Input array +@param image_header Pointer to IplImage structure used as a temporary buffer + */ +CVAPI(IplImage*) cvGetImage( const CvArr* arr, IplImage* image_header ); + + +/** @brief Changes the shape of a multi-dimensional array without copying the data. + +The function is an advanced version of cvReshape that can work with multi-dimensional arrays as +well (though it can work with ordinary images and matrices) and change the number of dimensions. + +Below are the two samples from the cvReshape description rewritten using cvReshapeMatND: +@code + IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3); + IplImage gray_img_hdr, *gray_img; + gray_img = (IplImage*)cvReshapeMatND(color_img, sizeof(gray_img_hdr), &gray_img_hdr, 1, 0, 0); + ... + int size[] = { 2, 2, 2 }; + CvMatND* mat = cvCreateMatND(3, size, CV_32F); + CvMat row_header, *row; + row = (CvMat*)cvReshapeMatND(mat, sizeof(row_header), &row_header, 0, 1, 0); +@endcode +In C, the header file for this function includes a convenient macro cvReshapeND that does away with +the sizeof_header parameter. So, the lines containing the call to cvReshapeMatND in the examples +may be replaced as follow: +@code + gray_img = (IplImage*)cvReshapeND(color_img, &gray_img_hdr, 1, 0, 0); + ... + row = (CvMat*)cvReshapeND(mat, &row_header, 0, 1, 0); +@endcode +@param arr Input array +@param sizeof_header Size of output header to distinguish between IplImage, CvMat and CvMatND +output headers +@param header Output header to be filled +@param new_cn New number of channels. new_cn = 0 means that the number of channels remains +unchanged. +@param new_dims New number of dimensions. new_dims = 0 means that the number of dimensions +remains the same. +@param new_sizes Array of new dimension sizes. Only new_dims-1 values are used, because the +total number of elements must remain the same. Thus, if new_dims = 1, new_sizes array is not +used. + */ +CVAPI(CvArr*) cvReshapeMatND( const CvArr* arr, + int sizeof_header, CvArr* header, + int new_cn, int new_dims, int* new_sizes ); + +#define cvReshapeND( arr, header, new_cn, new_dims, new_sizes ) \ + cvReshapeMatND( (arr), sizeof(*(header)), (header), \ + (new_cn), (new_dims), (new_sizes)) + +/** @brief Changes shape of matrix/image without copying data. + +The function initializes the CvMat header so that it points to the same data as the original array +but has a different shape - different number of channels, different number of rows, or both. + +The following example code creates one image buffer and two image headers, the first is for a +320x240x3 image and the second is for a 960x240x1 image: +@code + IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3); + CvMat gray_mat_hdr; + IplImage gray_img_hdr, *gray_img; + cvReshape(color_img, &gray_mat_hdr, 1); + gray_img = cvGetImage(&gray_mat_hdr, &gray_img_hdr); +@endcode +And the next example converts a 3x3 matrix to a single 1x9 vector: +@code + CvMat* mat = cvCreateMat(3, 3, CV_32F); + CvMat row_header, *row; + row = cvReshape(mat, &row_header, 0, 1); +@endcode +@param arr Input array +@param header Output header to be filled +@param new_cn New number of channels. 'new_cn = 0' means that the number of channels remains +unchanged. +@param new_rows New number of rows. 'new_rows = 0' means that the number of rows remains +unchanged unless it needs to be changed according to new_cn value. +*/ +CVAPI(CvMat*) cvReshape( const CvArr* arr, CvMat* header, + int new_cn, int new_rows CV_DEFAULT(0) ); + +/** Repeats source 2d array several times in both horizontal and + vertical direction to fill destination array */ +CVAPI(void) cvRepeat( const CvArr* src, CvArr* dst ); + +/** @brief Allocates array data + +The function allocates image, matrix or multi-dimensional dense array data. Note that in the case of +matrix types OpenCV allocation functions are used. In the case of IplImage they are used unless +CV_TURN_ON_IPL_COMPATIBILITY() has been called before. In the latter case IPL functions are used +to allocate the data. +@param arr Array header + */ +CVAPI(void) cvCreateData( CvArr* arr ); + +/** @brief Releases array data. + +The function releases the array data. In the case of CvMat or CvMatND it simply calls +cvDecRefData(), that is the function can not deallocate external data. See also the note to +cvCreateData . +@param arr Array header + */ +CVAPI(void) cvReleaseData( CvArr* arr ); + +/** @brief Assigns user data to the array header. + +The function assigns user data to the array header. Header should be initialized before using +cvCreateMatHeader, cvCreateImageHeader, cvCreateMatNDHeader, cvInitMatHeader, +cvInitImageHeader or cvInitMatNDHeader. +@param arr Array header +@param data User data +@param step Full row length in bytes + */ +CVAPI(void) cvSetData( CvArr* arr, void* data, int step ); + +/** @brief Retrieves low-level information about the array. + +The function fills output variables with low-level information about the array data. All output + +parameters are optional, so some of the pointers may be set to NULL. If the array is IplImage with +ROI set, the parameters of ROI are returned. + +The following example shows how to get access to array elements. It computes absolute values of the +array elements : +@code + float* data; + int step; + CvSize size; + + cvGetRawData(array, (uchar**)&data, &step, &size); + step /= sizeof(data[0]); + + for(int y = 0; y < size.height; y++, data += step ) + for(int x = 0; x < size.width; x++ ) + data[x] = (float)fabs(data[x]); +@endcode +@param arr Array header +@param data Output pointer to the whole image origin or ROI origin if ROI is set +@param step Output full row length in bytes +@param roi_size Output ROI size + */ +CVAPI(void) cvGetRawData( const CvArr* arr, uchar** data, + int* step CV_DEFAULT(NULL), + CvSize* roi_size CV_DEFAULT(NULL)); + +/** @brief Returns size of matrix or image ROI. + +The function returns number of rows (CvSize::height) and number of columns (CvSize::width) of the +input matrix or image. In the case of image the size of ROI is returned. +@param arr array header + */ +CVAPI(CvSize) cvGetSize( const CvArr* arr ); + +/** @brief Copies one array to another. + +The function copies selected elements from an input array to an output array: + +\f[\texttt{dst} (I)= \texttt{src} (I) \quad \text{if} \quad \texttt{mask} (I) \ne 0.\f] + +If any of the passed arrays is of IplImage type, then its ROI and COI fields are used. Both arrays +must have the same type, the same number of dimensions, and the same size. The function can also +copy sparse arrays (mask is not supported in this case). +@param src The source array +@param dst The destination array +@param mask Operation mask, 8-bit single channel array; specifies elements of the destination array +to be changed + */ +CVAPI(void) cvCopy( const CvArr* src, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Sets every element of an array to a given value. + +The function copies the scalar value to every selected element of the destination array: +\f[\texttt{arr} (I)= \texttt{value} \quad \text{if} \quad \texttt{mask} (I) \ne 0\f] +If array arr is of IplImage type, then is ROI used, but COI must not be set. +@param arr The destination array +@param value Fill value +@param mask Operation mask, 8-bit single channel array; specifies elements of the destination +array to be changed + */ +CVAPI(void) cvSet( CvArr* arr, CvScalar value, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Clears the array. + +The function clears the array. In the case of dense arrays (CvMat, CvMatND or IplImage), +cvZero(array) is equivalent to cvSet(array,cvScalarAll(0),0). In the case of sparse arrays all the +elements are removed. +@param arr Array to be cleared + */ +CVAPI(void) cvSetZero( CvArr* arr ); +#define cvZero cvSetZero + + +/** Splits a multi-channel array into the set of single-channel arrays or + extracts particular [color] plane */ +CVAPI(void) cvSplit( const CvArr* src, CvArr* dst0, CvArr* dst1, + CvArr* dst2, CvArr* dst3 ); + +/** Merges a set of single-channel arrays into the single multi-channel array + or inserts one particular [color] plane to the array */ +CVAPI(void) cvMerge( const CvArr* src0, const CvArr* src1, + const CvArr* src2, const CvArr* src3, + CvArr* dst ); + +/** Copies several channels from input arrays to + certain channels of output arrays */ +CVAPI(void) cvMixChannels( const CvArr** src, int src_count, + CvArr** dst, int dst_count, + const int* from_to, int pair_count ); + +/** @brief Converts one array to another with optional linear transformation. + +The function has several different purposes, and thus has several different names. It copies one +array to another with optional scaling, which is performed first, and/or optional type conversion, +performed after: + +\f[\texttt{dst} (I) = \texttt{scale} \texttt{src} (I) + ( \texttt{shift} _0, \texttt{shift} _1,...)\f] + +All the channels of multi-channel arrays are processed independently. + +The type of conversion is done with rounding and saturation, that is if the result of scaling + +conversion can not be represented exactly by a value of the destination array element type, it is +set to the nearest representable value on the real axis. +@param src Source array +@param dst Destination array +@param scale Scale factor +@param shift Value added to the scaled source array elements + */ +CVAPI(void) cvConvertScale( const CvArr* src, CvArr* dst, + double scale CV_DEFAULT(1), + double shift CV_DEFAULT(0) ); +#define cvCvtScale cvConvertScale +#define cvScale cvConvertScale +#define cvConvert( src, dst ) cvConvertScale( (src), (dst), 1, 0 ) + + +/** Performs linear transformation on every source array element, + stores absolute value of the result: + dst(x,y,c) = abs(scale*src(x,y,c)+shift). + destination array must have 8u type. + In other cases one may use cvConvertScale + cvAbsDiffS */ +CVAPI(void) cvConvertScaleAbs( const CvArr* src, CvArr* dst, + double scale CV_DEFAULT(1), + double shift CV_DEFAULT(0) ); +#define cvCvtScaleAbs cvConvertScaleAbs + + +/** checks termination criteria validity and + sets eps to default_eps (if it is not set), + max_iter to default_max_iters (if it is not set) +*/ +CVAPI(CvTermCriteria) cvCheckTermCriteria( CvTermCriteria criteria, + double default_eps, + int default_max_iters ); + +/****************************************************************************************\ +* Arithmetic, logic and comparison operations * +\****************************************************************************************/ + +/** dst(mask) = src1(mask) + src2(mask) */ +CVAPI(void) cvAdd( const CvArr* src1, const CvArr* src2, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(mask) = src(mask) + value */ +CVAPI(void) cvAddS( const CvArr* src, CvScalar value, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(mask) = src1(mask) - src2(mask) */ +CVAPI(void) cvSub( const CvArr* src1, const CvArr* src2, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(mask) = src(mask) - value = src(mask) + (-value) */ +CV_INLINE void cvSubS( const CvArr* src, CvScalar value, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)) +{ + cvAddS( src, cvScalar( -value.val[0], -value.val[1], -value.val[2], -value.val[3]), + dst, mask ); +} + +/** dst(mask) = value - src(mask) */ +CVAPI(void) cvSubRS( const CvArr* src, CvScalar value, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src1(idx) * src2(idx) * scale + (scaled element-wise multiplication of 2 arrays) */ +CVAPI(void) cvMul( const CvArr* src1, const CvArr* src2, + CvArr* dst, double scale CV_DEFAULT(1) ); + +/** element-wise division/inversion with scaling: + dst(idx) = src1(idx) * scale / src2(idx) + or dst(idx) = scale / src2(idx) if src1 == 0 */ +CVAPI(void) cvDiv( const CvArr* src1, const CvArr* src2, + CvArr* dst, double scale CV_DEFAULT(1)); + +/** dst = src1 * scale + src2 */ +CVAPI(void) cvScaleAdd( const CvArr* src1, CvScalar scale, + const CvArr* src2, CvArr* dst ); +#define cvAXPY( A, real_scalar, B, C ) cvScaleAdd(A, cvRealScalar(real_scalar), B, C) + +/** dst = src1 * alpha + src2 * beta + gamma */ +CVAPI(void) cvAddWeighted( const CvArr* src1, double alpha, + const CvArr* src2, double beta, + double gamma, CvArr* dst ); + +/** @brief Calculates the dot product of two arrays in Euclidean metrics. + +The function calculates and returns the Euclidean dot product of two arrays. + +\f[src1 \bullet src2 = \sum _I ( \texttt{src1} (I) \texttt{src2} (I))\f] + +In the case of multiple channel arrays, the results for all channels are accumulated. In particular, +cvDotProduct(a,a) where a is a complex vector, will return \f$||\texttt{a}||^2\f$. The function can +process multi-dimensional arrays, row by row, layer by layer, and so on. +@param src1 The first source array +@param src2 The second source array + */ +CVAPI(double) cvDotProduct( const CvArr* src1, const CvArr* src2 ); + +/** dst(idx) = src1(idx) & src2(idx) */ +CVAPI(void) cvAnd( const CvArr* src1, const CvArr* src2, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src(idx) & value */ +CVAPI(void) cvAndS( const CvArr* src, CvScalar value, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src1(idx) | src2(idx) */ +CVAPI(void) cvOr( const CvArr* src1, const CvArr* src2, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src(idx) | value */ +CVAPI(void) cvOrS( const CvArr* src, CvScalar value, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src1(idx) ^ src2(idx) */ +CVAPI(void) cvXor( const CvArr* src1, const CvArr* src2, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src(idx) ^ value */ +CVAPI(void) cvXorS( const CvArr* src, CvScalar value, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = ~src(idx) */ +CVAPI(void) cvNot( const CvArr* src, CvArr* dst ); + +/** dst(idx) = lower(idx) <= src(idx) < upper(idx) */ +CVAPI(void) cvInRange( const CvArr* src, const CvArr* lower, + const CvArr* upper, CvArr* dst ); + +/** dst(idx) = lower <= src(idx) < upper */ +CVAPI(void) cvInRangeS( const CvArr* src, CvScalar lower, + CvScalar upper, CvArr* dst ); + +#define CV_CMP_EQ 0 +#define CV_CMP_GT 1 +#define CV_CMP_GE 2 +#define CV_CMP_LT 3 +#define CV_CMP_LE 4 +#define CV_CMP_NE 5 + +/** The comparison operation support single-channel arrays only. + Destination image should be 8uC1 or 8sC1 */ + +/** dst(idx) = src1(idx) _cmp_op_ src2(idx) */ +CVAPI(void) cvCmp( const CvArr* src1, const CvArr* src2, CvArr* dst, int cmp_op ); + +/** dst(idx) = src1(idx) _cmp_op_ value */ +CVAPI(void) cvCmpS( const CvArr* src, double value, CvArr* dst, int cmp_op ); + +/** dst(idx) = min(src1(idx),src2(idx)) */ +CVAPI(void) cvMin( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** dst(idx) = max(src1(idx),src2(idx)) */ +CVAPI(void) cvMax( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** dst(idx) = min(src(idx),value) */ +CVAPI(void) cvMinS( const CvArr* src, double value, CvArr* dst ); + +/** dst(idx) = max(src(idx),value) */ +CVAPI(void) cvMaxS( const CvArr* src, double value, CvArr* dst ); + +/** dst(x,y,c) = abs(src1(x,y,c) - src2(x,y,c)) */ +CVAPI(void) cvAbsDiff( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** dst(x,y,c) = abs(src(x,y,c) - value(c)) */ +CVAPI(void) cvAbsDiffS( const CvArr* src, CvArr* dst, CvScalar value ); +#define cvAbs( src, dst ) cvAbsDiffS( (src), (dst), cvScalarAll(0)) + +/****************************************************************************************\ +* Math operations * +\****************************************************************************************/ + +/** Does cartesian->polar coordinates conversion. + Either of output components (magnitude or angle) is optional */ +CVAPI(void) cvCartToPolar( const CvArr* x, const CvArr* y, + CvArr* magnitude, CvArr* angle CV_DEFAULT(NULL), + int angle_in_degrees CV_DEFAULT(0)); + +/** Does polar->cartesian coordinates conversion. + Either of output components (magnitude or angle) is optional. + If magnitude is missing it is assumed to be all 1's */ +CVAPI(void) cvPolarToCart( const CvArr* magnitude, const CvArr* angle, + CvArr* x, CvArr* y, + int angle_in_degrees CV_DEFAULT(0)); + +/** Does powering: dst(idx) = src(idx)^power */ +CVAPI(void) cvPow( const CvArr* src, CvArr* dst, double power ); + +/** Does exponention: dst(idx) = exp(src(idx)). + Overflow is not handled yet. Underflow is handled. + Maximal relative error is ~7e-6 for single-precision input */ +CVAPI(void) cvExp( const CvArr* src, CvArr* dst ); + +/** Calculates natural logarithms: dst(idx) = log(abs(src(idx))). + Logarithm of 0 gives large negative number(~-700) + Maximal relative error is ~3e-7 for single-precision output +*/ +CVAPI(void) cvLog( const CvArr* src, CvArr* dst ); + +/** Fast arctangent calculation */ +CVAPI(float) cvFastArctan( float y, float x ); + +/** Fast cubic root calculation */ +CVAPI(float) cvCbrt( float value ); + +#define CV_CHECK_RANGE 1 +#define CV_CHECK_QUIET 2 +/** Checks array values for NaNs, Infs or simply for too large numbers + (if CV_CHECK_RANGE is set). If CV_CHECK_QUIET is set, + no runtime errors is raised (function returns zero value in case of "bad" values). + Otherwise cvError is called */ +CVAPI(int) cvCheckArr( const CvArr* arr, int flags CV_DEFAULT(0), + double min_val CV_DEFAULT(0), double max_val CV_DEFAULT(0)); +#define cvCheckArray cvCheckArr + +#define CV_RAND_UNI 0 +#define CV_RAND_NORMAL 1 + +/** @brief Fills an array with random numbers and updates the RNG state. + +The function fills the destination array with uniformly or normally distributed random numbers. +@param rng CvRNG state initialized by cvRNG +@param arr The destination array +@param dist_type Distribution type +> - **CV_RAND_UNI** uniform distribution +> - **CV_RAND_NORMAL** normal or Gaussian distribution +@param param1 The first parameter of the distribution. In the case of a uniform distribution it is +the inclusive lower boundary of the random numbers range. In the case of a normal distribution it +is the mean value of the random numbers. +@param param2 The second parameter of the distribution. In the case of a uniform distribution it +is the exclusive upper boundary of the random numbers range. In the case of a normal distribution +it is the standard deviation of the random numbers. +@sa randu, randn, RNG::fill. + */ +CVAPI(void) cvRandArr( CvRNG* rng, CvArr* arr, int dist_type, + CvScalar param1, CvScalar param2 ); + +CVAPI(void) cvRandShuffle( CvArr* mat, CvRNG* rng, + double iter_factor CV_DEFAULT(1.)); + +#define CV_SORT_EVERY_ROW 0 +#define CV_SORT_EVERY_COLUMN 1 +#define CV_SORT_ASCENDING 0 +#define CV_SORT_DESCENDING 16 + +CVAPI(void) cvSort( const CvArr* src, CvArr* dst CV_DEFAULT(NULL), + CvArr* idxmat CV_DEFAULT(NULL), + int flags CV_DEFAULT(0)); + +/** Finds real roots of a cubic equation */ +CVAPI(int) cvSolveCubic( const CvMat* coeffs, CvMat* roots ); + +/** Finds all real and complex roots of a polynomial equation */ +CVAPI(void) cvSolvePoly(const CvMat* coeffs, CvMat *roots2, + int maxiter CV_DEFAULT(20), int fig CV_DEFAULT(100)); + +/****************************************************************************************\ +* Matrix operations * +\****************************************************************************************/ + +/** @brief Calculates the cross product of two 3D vectors. + +The function calculates the cross product of two 3D vectors: +\f[\texttt{dst} = \texttt{src1} \times \texttt{src2}\f] +or: +\f[\begin{array}{l} \texttt{dst} _1 = \texttt{src1} _2 \texttt{src2} _3 - \texttt{src1} _3 \texttt{src2} _2 \\ \texttt{dst} _2 = \texttt{src1} _3 \texttt{src2} _1 - \texttt{src1} _1 \texttt{src2} _3 \\ \texttt{dst} _3 = \texttt{src1} _1 \texttt{src2} _2 - \texttt{src1} _2 \texttt{src2} _1 \end{array}\f] +@param src1 The first source vector +@param src2 The second source vector +@param dst The destination vector + */ +CVAPI(void) cvCrossProduct( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** Matrix transform: dst = A*B + C, C is optional */ +#define cvMatMulAdd( src1, src2, src3, dst ) cvGEMM( (src1), (src2), 1., (src3), 1., (dst), 0 ) +#define cvMatMul( src1, src2, dst ) cvMatMulAdd( (src1), (src2), NULL, (dst)) + +#define CV_GEMM_A_T 1 +#define CV_GEMM_B_T 2 +#define CV_GEMM_C_T 4 +/** Extended matrix transform: + dst = alpha*op(A)*op(B) + beta*op(C), where op(X) is X or X^T */ +CVAPI(void) cvGEMM( const CvArr* src1, const CvArr* src2, double alpha, + const CvArr* src3, double beta, CvArr* dst, + int tABC CV_DEFAULT(0)); +#define cvMatMulAddEx cvGEMM + +/** Transforms each element of source array and stores + resultant vectors in destination array */ +CVAPI(void) cvTransform( const CvArr* src, CvArr* dst, + const CvMat* transmat, + const CvMat* shiftvec CV_DEFAULT(NULL)); +#define cvMatMulAddS cvTransform + +/** Does perspective transform on every element of input array */ +CVAPI(void) cvPerspectiveTransform( const CvArr* src, CvArr* dst, + const CvMat* mat ); + +/** Calculates (A-delta)*(A-delta)^T (order=0) or (A-delta)^T*(A-delta) (order=1) */ +CVAPI(void) cvMulTransposed( const CvArr* src, CvArr* dst, int order, + const CvArr* delta CV_DEFAULT(NULL), + double scale CV_DEFAULT(1.) ); + +/** Transposes matrix. Square matrices can be transposed in-place */ +CVAPI(void) cvTranspose( const CvArr* src, CvArr* dst ); +#define cvT cvTranspose + +/** Completes the symmetric matrix from the lower (LtoR=0) or from the upper (LtoR!=0) part */ +CVAPI(void) cvCompleteSymm( CvMat* matrix, int LtoR CV_DEFAULT(0) ); + +/** Mirror array data around horizontal (flip=0), + vertical (flip=1) or both(flip=-1) axises: + cvFlip(src) flips images vertically and sequences horizontally (inplace) */ +CVAPI(void) cvFlip( const CvArr* src, CvArr* dst CV_DEFAULT(NULL), + int flip_mode CV_DEFAULT(0)); +#define cvMirror cvFlip + + +#define CV_SVD_MODIFY_A 1 +#define CV_SVD_U_T 2 +#define CV_SVD_V_T 4 + +/** Performs Singular Value Decomposition of a matrix */ +CVAPI(void) cvSVD( CvArr* A, CvArr* W, CvArr* U CV_DEFAULT(NULL), + CvArr* V CV_DEFAULT(NULL), int flags CV_DEFAULT(0)); + +/** Performs Singular Value Back Substitution (solves A*X = B): + flags must be the same as in cvSVD */ +CVAPI(void) cvSVBkSb( const CvArr* W, const CvArr* U, + const CvArr* V, const CvArr* B, + CvArr* X, int flags ); + +#define CV_LU 0 +#define CV_SVD 1 +#define CV_SVD_SYM 2 +#define CV_CHOLESKY 3 +#define CV_QR 4 +#define CV_NORMAL 16 + +/** Inverts matrix */ +CVAPI(double) cvInvert( const CvArr* src, CvArr* dst, + int method CV_DEFAULT(CV_LU)); +#define cvInv cvInvert + +/** Solves linear system (src1)*(dst) = (src2) + (returns 0 if src1 is a singular and CV_LU method is used) */ +CVAPI(int) cvSolve( const CvArr* src1, const CvArr* src2, CvArr* dst, + int method CV_DEFAULT(CV_LU)); + +/** Calculates determinant of input matrix */ +CVAPI(double) cvDet( const CvArr* mat ); + +/** Calculates trace of the matrix (sum of elements on the main diagonal) */ +CVAPI(CvScalar) cvTrace( const CvArr* mat ); + +/** Finds eigen values and vectors of a symmetric matrix */ +CVAPI(void) cvEigenVV( CvArr* mat, CvArr* evects, CvArr* evals, + double eps CV_DEFAULT(0), + int lowindex CV_DEFAULT(-1), + int highindex CV_DEFAULT(-1)); + +///* Finds selected eigen values and vectors of a symmetric matrix */ +//CVAPI(void) cvSelectedEigenVV( CvArr* mat, CvArr* evects, CvArr* evals, +// int lowindex, int highindex ); + +/** Makes an identity matrix (mat_ij = i == j) */ +CVAPI(void) cvSetIdentity( CvArr* mat, CvScalar value CV_DEFAULT(cvRealScalar(1)) ); + +/** Fills matrix with given range of numbers */ +CVAPI(CvArr*) cvRange( CvArr* mat, double start, double end ); + +/** @anchor core_c_CovarFlags +@name Flags for cvCalcCovarMatrix +@see cvCalcCovarMatrix + @{ +*/ + +/** flag for cvCalcCovarMatrix, transpose([v1-avg, v2-avg,...]) * [v1-avg,v2-avg,...] */ +#define CV_COVAR_SCRAMBLED 0 + +/** flag for cvCalcCovarMatrix, [v1-avg, v2-avg,...] * transpose([v1-avg,v2-avg,...]) */ +#define CV_COVAR_NORMAL 1 + +/** flag for cvCalcCovarMatrix, do not calc average (i.e. mean vector) - use the input vector instead + (useful for calculating covariance matrix by parts) */ +#define CV_COVAR_USE_AVG 2 + +/** flag for cvCalcCovarMatrix, scale the covariance matrix coefficients by number of the vectors */ +#define CV_COVAR_SCALE 4 + +/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its rows */ +#define CV_COVAR_ROWS 8 + +/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its columns */ +#define CV_COVAR_COLS 16 + +/** @} */ + +/** Calculates covariation matrix for a set of vectors +@see @ref core_c_CovarFlags "flags" +*/ +CVAPI(void) cvCalcCovarMatrix( const CvArr** vects, int count, + CvArr* cov_mat, CvArr* avg, int flags ); + +#define CV_PCA_DATA_AS_ROW 0 +#define CV_PCA_DATA_AS_COL 1 +#define CV_PCA_USE_AVG 2 +CVAPI(void) cvCalcPCA( const CvArr* data, CvArr* mean, + CvArr* eigenvals, CvArr* eigenvects, int flags ); + +CVAPI(void) cvProjectPCA( const CvArr* data, const CvArr* mean, + const CvArr* eigenvects, CvArr* result ); + +CVAPI(void) cvBackProjectPCA( const CvArr* proj, const CvArr* mean, + const CvArr* eigenvects, CvArr* result ); + +/** Calculates Mahalanobis(weighted) distance */ +CVAPI(double) cvMahalanobis( const CvArr* vec1, const CvArr* vec2, const CvArr* mat ); +#define cvMahalonobis cvMahalanobis + +/****************************************************************************************\ +* Array Statistics * +\****************************************************************************************/ + +/** Finds sum of array elements */ +CVAPI(CvScalar) cvSum( const CvArr* arr ); + +/** Calculates number of non-zero pixels */ +CVAPI(int) cvCountNonZero( const CvArr* arr ); + +/** Calculates mean value of array elements */ +CVAPI(CvScalar) cvAvg( const CvArr* arr, const CvArr* mask CV_DEFAULT(NULL) ); + +/** Calculates mean and standard deviation of pixel values */ +CVAPI(void) cvAvgSdv( const CvArr* arr, CvScalar* mean, CvScalar* std_dev, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** Finds global minimum, maximum and their positions */ +CVAPI(void) cvMinMaxLoc( const CvArr* arr, double* min_val, double* max_val, + CvPoint* min_loc CV_DEFAULT(NULL), + CvPoint* max_loc CV_DEFAULT(NULL), + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @anchor core_c_NormFlags + @name Flags for cvNorm and cvNormalize + @{ +*/ +#define CV_C 1 +#define CV_L1 2 +#define CV_L2 4 +#define CV_NORM_MASK 7 +#define CV_RELATIVE 8 +#define CV_DIFF 16 +#define CV_MINMAX 32 + +#define CV_DIFF_C (CV_DIFF | CV_C) +#define CV_DIFF_L1 (CV_DIFF | CV_L1) +#define CV_DIFF_L2 (CV_DIFF | CV_L2) +#define CV_RELATIVE_C (CV_RELATIVE | CV_C) +#define CV_RELATIVE_L1 (CV_RELATIVE | CV_L1) +#define CV_RELATIVE_L2 (CV_RELATIVE | CV_L2) +/** @} */ + +/** Finds norm, difference norm or relative difference norm for an array (or two arrays) +@see ref core_c_NormFlags "flags" +*/ +CVAPI(double) cvNorm( const CvArr* arr1, const CvArr* arr2 CV_DEFAULT(NULL), + int norm_type CV_DEFAULT(CV_L2), + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @see ref core_c_NormFlags "flags" */ +CVAPI(void) cvNormalize( const CvArr* src, CvArr* dst, + double a CV_DEFAULT(1.), double b CV_DEFAULT(0.), + int norm_type CV_DEFAULT(CV_L2), + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @anchor core_c_ReduceFlags + @name Flags for cvReduce + @{ +*/ +#define CV_REDUCE_SUM 0 +#define CV_REDUCE_AVG 1 +#define CV_REDUCE_MAX 2 +#define CV_REDUCE_MIN 3 +/** @} */ + +/** @see @ref core_c_ReduceFlags "flags" */ +CVAPI(void) cvReduce( const CvArr* src, CvArr* dst, int dim CV_DEFAULT(-1), + int op CV_DEFAULT(CV_REDUCE_SUM) ); + +/****************************************************************************************\ +* Discrete Linear Transforms and Related Functions * +\****************************************************************************************/ + +/** @anchor core_c_DftFlags + @name Flags for cvDFT, cvDCT and cvMulSpectrums + @{ + */ +#define CV_DXT_FORWARD 0 +#define CV_DXT_INVERSE 1 +#define CV_DXT_SCALE 2 /**< divide result by size of array */ +#define CV_DXT_INV_SCALE (CV_DXT_INVERSE + CV_DXT_SCALE) +#define CV_DXT_INVERSE_SCALE CV_DXT_INV_SCALE +#define CV_DXT_ROWS 4 /**< transform each row individually */ +#define CV_DXT_MUL_CONJ 8 /**< conjugate the second argument of cvMulSpectrums */ +/** @} */ + +/** Discrete Fourier Transform: + complex->complex, + real->ccs (forward), + ccs->real (inverse) +@see core_c_DftFlags "flags" +*/ +CVAPI(void) cvDFT( const CvArr* src, CvArr* dst, int flags, + int nonzero_rows CV_DEFAULT(0) ); +#define cvFFT cvDFT + +/** Multiply results of DFTs: DFT(X)*DFT(Y) or DFT(X)*conj(DFT(Y)) +@see core_c_DftFlags "flags" +*/ +CVAPI(void) cvMulSpectrums( const CvArr* src1, const CvArr* src2, + CvArr* dst, int flags ); + +/** Finds optimal DFT vector size >= size0 */ +CVAPI(int) cvGetOptimalDFTSize( int size0 ); + +/** Discrete Cosine Transform +@see core_c_DftFlags "flags" +*/ +CVAPI(void) cvDCT( const CvArr* src, CvArr* dst, int flags ); + +/****************************************************************************************\ +* Dynamic data structures * +\****************************************************************************************/ + +/** Calculates length of sequence slice (with support of negative indices). */ +CVAPI(int) cvSliceLength( CvSlice slice, const CvSeq* seq ); + + +/** Creates new memory storage. + block_size == 0 means that default, + somewhat optimal size, is used (currently, it is 64K) */ +CVAPI(CvMemStorage*) cvCreateMemStorage( int block_size CV_DEFAULT(0)); + + +/** Creates a memory storage that will borrow memory blocks from parent storage */ +CVAPI(CvMemStorage*) cvCreateChildMemStorage( CvMemStorage* parent ); + + +/** Releases memory storage. All the children of a parent must be released before + the parent. A child storage returns all the blocks to parent when it is released */ +CVAPI(void) cvReleaseMemStorage( CvMemStorage** storage ); + + +/** Clears memory storage. This is the only way(!!!) (besides cvRestoreMemStoragePos) + to reuse memory allocated for the storage - cvClearSeq,cvClearSet ... + do not free any memory. + A child storage returns all the blocks to the parent when it is cleared */ +CVAPI(void) cvClearMemStorage( CvMemStorage* storage ); + +/** Remember a storage "free memory" position */ +CVAPI(void) cvSaveMemStoragePos( const CvMemStorage* storage, CvMemStoragePos* pos ); + +/** Restore a storage "free memory" position */ +CVAPI(void) cvRestoreMemStoragePos( CvMemStorage* storage, CvMemStoragePos* pos ); + +/** Allocates continuous buffer of the specified size in the storage */ +CVAPI(void*) cvMemStorageAlloc( CvMemStorage* storage, size_t size ); + +/** Allocates string in memory storage */ +//CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr, +// int len CV_DEFAULT(-1) ); + +/** Creates new empty sequence that will reside in the specified storage */ +CVAPI(CvSeq*) cvCreateSeq( int seq_flags, size_t header_size, + size_t elem_size, CvMemStorage* storage ); + +/** Changes default size (granularity) of sequence blocks. + The default size is ~1Kbyte */ +CVAPI(void) cvSetSeqBlockSize( CvSeq* seq, int delta_elems ); + + +/** Adds new element to the end of sequence. Returns pointer to the element */ +CVAPI(schar*) cvSeqPush( CvSeq* seq, const void* element CV_DEFAULT(NULL)); + + +/** Adds new element to the beginning of sequence. Returns pointer to it */ +CVAPI(schar*) cvSeqPushFront( CvSeq* seq, const void* element CV_DEFAULT(NULL)); + + +/** Removes the last element from sequence and optionally saves it */ +CVAPI(void) cvSeqPop( CvSeq* seq, void* element CV_DEFAULT(NULL)); + + +/** Removes the first element from sequence and optioanally saves it */ +CVAPI(void) cvSeqPopFront( CvSeq* seq, void* element CV_DEFAULT(NULL)); + + +#define CV_FRONT 1 +#define CV_BACK 0 +/** Adds several new elements to the end of sequence */ +CVAPI(void) cvSeqPushMulti( CvSeq* seq, const void* elements, + int count, int in_front CV_DEFAULT(0) ); + +/** Removes several elements from the end of sequence and optionally saves them */ +CVAPI(void) cvSeqPopMulti( CvSeq* seq, void* elements, + int count, int in_front CV_DEFAULT(0) ); + +/** Inserts a new element in the middle of sequence. + cvSeqInsert(seq,0,elem) == cvSeqPushFront(seq,elem) */ +CVAPI(schar*) cvSeqInsert( CvSeq* seq, int before_index, + const void* element CV_DEFAULT(NULL)); + +/** Removes specified sequence element */ +CVAPI(void) cvSeqRemove( CvSeq* seq, int index ); + + +/** Removes all the elements from the sequence. The freed memory + can be reused later only by the same sequence unless cvClearMemStorage + or cvRestoreMemStoragePos is called */ +CVAPI(void) cvClearSeq( CvSeq* seq ); + + +/** Retrieves pointer to specified sequence element. + Negative indices are supported and mean counting from the end + (e.g -1 means the last sequence element) */ +CVAPI(schar*) cvGetSeqElem( const CvSeq* seq, int index ); + +/** Calculates index of the specified sequence element. + Returns -1 if element does not belong to the sequence */ +CVAPI(int) cvSeqElemIdx( const CvSeq* seq, const void* element, + CvSeqBlock** block CV_DEFAULT(NULL) ); + +/** Initializes sequence writer. The new elements will be added to the end of sequence */ +CVAPI(void) cvStartAppendToSeq( CvSeq* seq, CvSeqWriter* writer ); + + +/** Combination of cvCreateSeq and cvStartAppendToSeq */ +CVAPI(void) cvStartWriteSeq( int seq_flags, int header_size, + int elem_size, CvMemStorage* storage, + CvSeqWriter* writer ); + +/** Closes sequence writer, updates sequence header and returns pointer + to the resultant sequence + (which may be useful if the sequence was created using cvStartWriteSeq)) +*/ +CVAPI(CvSeq*) cvEndWriteSeq( CvSeqWriter* writer ); + + +/** Updates sequence header. May be useful to get access to some of previously + written elements via cvGetSeqElem or sequence reader */ +CVAPI(void) cvFlushSeqWriter( CvSeqWriter* writer ); + + +/** Initializes sequence reader. + The sequence can be read in forward or backward direction */ +CVAPI(void) cvStartReadSeq( const CvSeq* seq, CvSeqReader* reader, + int reverse CV_DEFAULT(0) ); + + +/** Returns current sequence reader position (currently observed sequence element) */ +CVAPI(int) cvGetSeqReaderPos( CvSeqReader* reader ); + + +/** Changes sequence reader position. It may seek to an absolute or + to relative to the current position */ +CVAPI(void) cvSetSeqReaderPos( CvSeqReader* reader, int index, + int is_relative CV_DEFAULT(0)); + +/** Copies sequence content to a continuous piece of memory */ +CVAPI(void*) cvCvtSeqToArray( const CvSeq* seq, void* elements, + CvSlice slice CV_DEFAULT(CV_WHOLE_SEQ) ); + +/** Creates sequence header for array. + After that all the operations on sequences that do not alter the content + can be applied to the resultant sequence */ +CVAPI(CvSeq*) cvMakeSeqHeaderForArray( int seq_type, int header_size, + int elem_size, void* elements, int total, + CvSeq* seq, CvSeqBlock* block ); + +/** Extracts sequence slice (with or without copying sequence elements) */ +CVAPI(CvSeq*) cvSeqSlice( const CvSeq* seq, CvSlice slice, + CvMemStorage* storage CV_DEFAULT(NULL), + int copy_data CV_DEFAULT(0)); + +CV_INLINE CvSeq* cvCloneSeq( const CvSeq* seq, CvMemStorage* storage CV_DEFAULT(NULL)) +{ + return cvSeqSlice( seq, CV_WHOLE_SEQ, storage, 1 ); +} + +/** Removes sequence slice */ +CVAPI(void) cvSeqRemoveSlice( CvSeq* seq, CvSlice slice ); + +/** Inserts a sequence or array into another sequence */ +CVAPI(void) cvSeqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr ); + +/** a < b ? -1 : a > b ? 1 : 0 */ +typedef int (CV_CDECL* CvCmpFunc)(const void* a, const void* b, void* userdata ); + +/** Sorts sequence in-place given element comparison function */ +CVAPI(void) cvSeqSort( CvSeq* seq, CvCmpFunc func, void* userdata CV_DEFAULT(NULL) ); + +/** Finds element in a [sorted] sequence */ +CVAPI(schar*) cvSeqSearch( CvSeq* seq, const void* elem, CvCmpFunc func, + int is_sorted, int* elem_idx, + void* userdata CV_DEFAULT(NULL) ); + +/** Reverses order of sequence elements in-place */ +CVAPI(void) cvSeqInvert( CvSeq* seq ); + +/** Splits sequence into one or more equivalence classes using the specified criteria */ +CVAPI(int) cvSeqPartition( const CvSeq* seq, CvMemStorage* storage, + CvSeq** labels, CvCmpFunc is_equal, void* userdata ); + +/************ Internal sequence functions ************/ +CVAPI(void) cvChangeSeqBlock( void* reader, int direction ); +CVAPI(void) cvCreateSeqBlock( CvSeqWriter* writer ); + + +/** Creates a new set */ +CVAPI(CvSet*) cvCreateSet( int set_flags, int header_size, + int elem_size, CvMemStorage* storage ); + +/** Adds new element to the set and returns pointer to it */ +CVAPI(int) cvSetAdd( CvSet* set_header, CvSetElem* elem CV_DEFAULT(NULL), + CvSetElem** inserted_elem CV_DEFAULT(NULL) ); + +/** Fast variant of cvSetAdd */ +CV_INLINE CvSetElem* cvSetNew( CvSet* set_header ) +{ + CvSetElem* elem = set_header->free_elems; + if( elem ) + { + set_header->free_elems = elem->next_free; + elem->flags = elem->flags & CV_SET_ELEM_IDX_MASK; + set_header->active_count++; + } + else + cvSetAdd( set_header, NULL, &elem ); + return elem; +} + +/** Removes set element given its pointer */ +CV_INLINE void cvSetRemoveByPtr( CvSet* set_header, void* elem ) +{ + CvSetElem* _elem = (CvSetElem*)elem; + assert( _elem->flags >= 0 /*&& (elem->flags & CV_SET_ELEM_IDX_MASK) < set_header->total*/ ); + _elem->next_free = set_header->free_elems; + _elem->flags = (_elem->flags & CV_SET_ELEM_IDX_MASK) | CV_SET_ELEM_FREE_FLAG; + set_header->free_elems = _elem; + set_header->active_count--; +} + +/** Removes element from the set by its index */ +CVAPI(void) cvSetRemove( CvSet* set_header, int index ); + +/** Returns a set element by index. If the element doesn't belong to the set, + NULL is returned */ +CV_INLINE CvSetElem* cvGetSetElem( const CvSet* set_header, int idx ) +{ + CvSetElem* elem = (CvSetElem*)(void *)cvGetSeqElem( (CvSeq*)set_header, idx ); + return elem && CV_IS_SET_ELEM( elem ) ? elem : 0; +} + +/** Removes all the elements from the set */ +CVAPI(void) cvClearSet( CvSet* set_header ); + +/** Creates new graph */ +CVAPI(CvGraph*) cvCreateGraph( int graph_flags, int header_size, + int vtx_size, int edge_size, + CvMemStorage* storage ); + +/** Adds new vertex to the graph */ +CVAPI(int) cvGraphAddVtx( CvGraph* graph, const CvGraphVtx* vtx CV_DEFAULT(NULL), + CvGraphVtx** inserted_vtx CV_DEFAULT(NULL) ); + + +/** Removes vertex from the graph together with all incident edges */ +CVAPI(int) cvGraphRemoveVtx( CvGraph* graph, int index ); +CVAPI(int) cvGraphRemoveVtxByPtr( CvGraph* graph, CvGraphVtx* vtx ); + + +/** Link two vertices specified by indices or pointers if they + are not connected or return pointer to already existing edge + connecting the vertices. + Functions return 1 if a new edge was created, 0 otherwise */ +CVAPI(int) cvGraphAddEdge( CvGraph* graph, + int start_idx, int end_idx, + const CvGraphEdge* edge CV_DEFAULT(NULL), + CvGraphEdge** inserted_edge CV_DEFAULT(NULL) ); + +CVAPI(int) cvGraphAddEdgeByPtr( CvGraph* graph, + CvGraphVtx* start_vtx, CvGraphVtx* end_vtx, + const CvGraphEdge* edge CV_DEFAULT(NULL), + CvGraphEdge** inserted_edge CV_DEFAULT(NULL) ); + +/** Remove edge connecting two vertices */ +CVAPI(void) cvGraphRemoveEdge( CvGraph* graph, int start_idx, int end_idx ); +CVAPI(void) cvGraphRemoveEdgeByPtr( CvGraph* graph, CvGraphVtx* start_vtx, + CvGraphVtx* end_vtx ); + +/** Find edge connecting two vertices */ +CVAPI(CvGraphEdge*) cvFindGraphEdge( const CvGraph* graph, int start_idx, int end_idx ); +CVAPI(CvGraphEdge*) cvFindGraphEdgeByPtr( const CvGraph* graph, + const CvGraphVtx* start_vtx, + const CvGraphVtx* end_vtx ); +#define cvGraphFindEdge cvFindGraphEdge +#define cvGraphFindEdgeByPtr cvFindGraphEdgeByPtr + +/** Remove all vertices and edges from the graph */ +CVAPI(void) cvClearGraph( CvGraph* graph ); + + +/** Count number of edges incident to the vertex */ +CVAPI(int) cvGraphVtxDegree( const CvGraph* graph, int vtx_idx ); +CVAPI(int) cvGraphVtxDegreeByPtr( const CvGraph* graph, const CvGraphVtx* vtx ); + + +/** Retrieves graph vertex by given index */ +#define cvGetGraphVtx( graph, idx ) (CvGraphVtx*)cvGetSetElem((CvSet*)(graph), (idx)) + +/** Retrieves index of a graph vertex given its pointer */ +#define cvGraphVtxIdx( graph, vtx ) ((vtx)->flags & CV_SET_ELEM_IDX_MASK) + +/** Retrieves index of a graph edge given its pointer */ +#define cvGraphEdgeIdx( graph, edge ) ((edge)->flags & CV_SET_ELEM_IDX_MASK) + +#define cvGraphGetVtxCount( graph ) ((graph)->active_count) +#define cvGraphGetEdgeCount( graph ) ((graph)->edges->active_count) + +#define CV_GRAPH_VERTEX 1 +#define CV_GRAPH_TREE_EDGE 2 +#define CV_GRAPH_BACK_EDGE 4 +#define CV_GRAPH_FORWARD_EDGE 8 +#define CV_GRAPH_CROSS_EDGE 16 +#define CV_GRAPH_ANY_EDGE 30 +#define CV_GRAPH_NEW_TREE 32 +#define CV_GRAPH_BACKTRACKING 64 +#define CV_GRAPH_OVER -1 + +#define CV_GRAPH_ALL_ITEMS -1 + +/** flags for graph vertices and edges */ +#define CV_GRAPH_ITEM_VISITED_FLAG (1 << 30) +#define CV_IS_GRAPH_VERTEX_VISITED(vtx) \ + (((CvGraphVtx*)(vtx))->flags & CV_GRAPH_ITEM_VISITED_FLAG) +#define CV_IS_GRAPH_EDGE_VISITED(edge) \ + (((CvGraphEdge*)(edge))->flags & CV_GRAPH_ITEM_VISITED_FLAG) +#define CV_GRAPH_SEARCH_TREE_NODE_FLAG (1 << 29) +#define CV_GRAPH_FORWARD_EDGE_FLAG (1 << 28) + +typedef struct CvGraphScanner +{ + CvGraphVtx* vtx; /* current graph vertex (or current edge origin) */ + CvGraphVtx* dst; /* current graph edge destination vertex */ + CvGraphEdge* edge; /* current edge */ + + CvGraph* graph; /* the graph */ + CvSeq* stack; /* the graph vertex stack */ + int index; /* the lower bound of certainly visited vertices */ + int mask; /* event mask */ +} +CvGraphScanner; + +/** Creates new graph scanner. */ +CVAPI(CvGraphScanner*) cvCreateGraphScanner( CvGraph* graph, + CvGraphVtx* vtx CV_DEFAULT(NULL), + int mask CV_DEFAULT(CV_GRAPH_ALL_ITEMS)); + +/** Releases graph scanner. */ +CVAPI(void) cvReleaseGraphScanner( CvGraphScanner** scanner ); + +/** Get next graph element */ +CVAPI(int) cvNextGraphItem( CvGraphScanner* scanner ); + +/** Creates a copy of graph */ +CVAPI(CvGraph*) cvCloneGraph( const CvGraph* graph, CvMemStorage* storage ); + + +/** Does look-up transformation. Elements of the source array + (that should be 8uC1 or 8sC1) are used as indexes in lutarr 256-element table */ +CVAPI(void) cvLUT( const CvArr* src, CvArr* dst, const CvArr* lut ); + + +/******************* Iteration through the sequence tree *****************/ +typedef struct CvTreeNodeIterator +{ + const void* node; + int level; + int max_level; +} +CvTreeNodeIterator; + +CVAPI(void) cvInitTreeNodeIterator( CvTreeNodeIterator* tree_iterator, + const void* first, int max_level ); +CVAPI(void*) cvNextTreeNode( CvTreeNodeIterator* tree_iterator ); +CVAPI(void*) cvPrevTreeNode( CvTreeNodeIterator* tree_iterator ); + +/** Inserts sequence into tree with specified "parent" sequence. + If parent is equal to frame (e.g. the most external contour), + then added contour will have null pointer to parent. */ +CVAPI(void) cvInsertNodeIntoTree( void* node, void* parent, void* frame ); + +/** Removes contour from tree (together with the contour children). */ +CVAPI(void) cvRemoveNodeFromTree( void* node, void* frame ); + +/** Gathers pointers to all the sequences, + accessible from the `first`, to the single sequence */ +CVAPI(CvSeq*) cvTreeToNodeSeq( const void* first, int header_size, + CvMemStorage* storage ); + +/** The function implements the K-means algorithm for clustering an array of sample + vectors in a specified number of classes */ +#define CV_KMEANS_USE_INITIAL_LABELS 1 +CVAPI(int) cvKMeans2( const CvArr* samples, int cluster_count, CvArr* labels, + CvTermCriteria termcrit, int attempts CV_DEFAULT(1), + CvRNG* rng CV_DEFAULT(0), int flags CV_DEFAULT(0), + CvArr* _centers CV_DEFAULT(0), double* compactness CV_DEFAULT(0) ); + +/****************************************************************************************\ +* System functions * +\****************************************************************************************/ + +/** Loads optimized functions from IPP, MKL etc. or switches back to pure C code */ +CVAPI(int) cvUseOptimized( int on_off ); + +typedef IplImage* (CV_STDCALL* Cv_iplCreateImageHeader) + (int,int,int,char*,char*,int,int,int,int,int, + IplROI*,IplImage*,void*,IplTileInfo*); +typedef void (CV_STDCALL* Cv_iplAllocateImageData)(IplImage*,int,int); +typedef void (CV_STDCALL* Cv_iplDeallocate)(IplImage*,int); +typedef IplROI* (CV_STDCALL* Cv_iplCreateROI)(int,int,int,int,int); +typedef IplImage* (CV_STDCALL* Cv_iplCloneImage)(const IplImage*); + +/** @brief Makes OpenCV use IPL functions for allocating IplImage and IplROI structures. + +Normally, the function is not called directly. Instead, a simple macro +CV_TURN_ON_IPL_COMPATIBILITY() is used that calls cvSetIPLAllocators and passes there pointers +to IPL allocation functions. : +@code + ... + CV_TURN_ON_IPL_COMPATIBILITY() + ... +@endcode +@param create_header pointer to a function, creating IPL image header. +@param allocate_data pointer to a function, allocating IPL image data. +@param deallocate pointer to a function, deallocating IPL image. +@param create_roi pointer to a function, creating IPL image ROI (i.e. Region of Interest). +@param clone_image pointer to a function, cloning an IPL image. + */ +CVAPI(void) cvSetIPLAllocators( Cv_iplCreateImageHeader create_header, + Cv_iplAllocateImageData allocate_data, + Cv_iplDeallocate deallocate, + Cv_iplCreateROI create_roi, + Cv_iplCloneImage clone_image ); + +#define CV_TURN_ON_IPL_COMPATIBILITY() \ + cvSetIPLAllocators( iplCreateImageHeader, iplAllocateImage, \ + iplDeallocate, iplCreateROI, iplCloneImage ) + +/****************************************************************************************\ +* Data Persistence * +\****************************************************************************************/ + +#if 0 +/********************************** High-level functions ********************************/ + +/** @brief Opens file storage for reading or writing data. + +The function opens file storage for reading or writing data. In the latter case, a new file is +created or an existing file is rewritten. The type of the read or written file is determined by the +filename extension: .xml for XML, .yml or .yaml for YAML and .json for JSON. + +At the same time, it also supports adding parameters like "example.xml?base64". + +The function returns a pointer to the CvFileStorage structure. +If the file cannot be opened then the function returns NULL. +@param filename Name of the file associated with the storage +@param memstorage Memory storage used for temporary data and for +: storing dynamic structures, such as CvSeq or CvGraph . If it is NULL, a temporary memory + storage is created and used. +@param flags Can be one of the following: +> - **CV_STORAGE_READ** the storage is open for reading +> - **CV_STORAGE_WRITE** the storage is open for writing + (use **CV_STORAGE_WRITE | CV_STORAGE_WRITE_BASE64** to write rawdata in Base64) +@param encoding + */ +CVAPI(CvFileStorage*) cvOpenFileStorage( const char* filename, CvMemStorage* memstorage, + int flags, const char* encoding CV_DEFAULT(NULL) ); + +/** @brief Releases file storage. + +The function closes the file associated with the storage and releases all the temporary structures. +It must be called after all I/O operations with the storage are finished. +@param fs Double pointer to the released file storage + */ +CVAPI(void) cvReleaseFileStorage( CvFileStorage** fs ); + +/** returns attribute value or 0 (NULL) if there is no such attribute */ +CVAPI(const char*) cvAttrValue( const CvAttrList* attr, const char* attr_name ); + +/** @brief Starts writing a new structure. + +The function starts writing a compound structure (collection) that can be a sequence or a map. After +all the structure fields, which can be scalars or structures, are written, cvEndWriteStruct should +be called. The function can be used to group some objects or to implement the write function for a +some user object (see CvTypeInfo). +@param fs File storage +@param name Name of the written structure. The structure can be accessed by this name when the +storage is read. +@param struct_flags A combination one of the following values: +- **CV_NODE_SEQ** the written structure is a sequence (see discussion of CvFileStorage ), + that is, its elements do not have a name. +- **CV_NODE_MAP** the written structure is a map (see discussion of CvFileStorage ), that + is, all its elements have names. +One and only one of the two above flags must be specified +- **CV_NODE_FLOW** the optional flag that makes sense only for YAML streams. It means that + the structure is written as a flow (not as a block), which is more compact. It is + recommended to use this flag for structures or arrays whose elements are all scalars. +@param type_name Optional parameter - the object type name. In + case of XML it is written as a type_id attribute of the structure opening tag. In the case of + YAML it is written after a colon following the structure name (see the example in + CvFileStorage description). In case of JSON it is written as a name/value pair. + Mainly it is used with user objects. When the storage is read, the + encoded type name is used to determine the object type (see CvTypeInfo and cvFindType ). +@param attributes This parameter is not used in the current implementation + */ +CVAPI(void) cvStartWriteStruct( CvFileStorage* fs, const char* name, + int struct_flags, const char* type_name CV_DEFAULT(NULL), + CvAttrList attributes CV_DEFAULT(cvAttrList())); + +/** @brief Finishes writing to a file node collection. +@param fs File storage +@sa cvStartWriteStruct. + */ +CVAPI(void) cvEndWriteStruct( CvFileStorage* fs ); + +/** @brief Writes an integer value. + +The function writes a single integer value (with or without a name) to the file storage. +@param fs File storage +@param name Name of the written value. Should be NULL if and only if the parent structure is a +sequence. +@param value The written value + */ +CVAPI(void) cvWriteInt( CvFileStorage* fs, const char* name, int value ); + +/** @brief Writes a floating-point value. + +The function writes a single floating-point value (with or without a name) to file storage. Special +values are encoded as follows: NaN (Not A Number) as .NaN, infinity as +.Inf or -.Inf. + +The following example shows how to use the low-level writing functions to store custom structures, +such as termination criteria, without registering a new type. : +@code + void write_termcriteria( CvFileStorage* fs, const char* struct_name, + CvTermCriteria* termcrit ) + { + cvStartWriteStruct( fs, struct_name, CV_NODE_MAP, NULL, cvAttrList(0,0)); + cvWriteComment( fs, "termination criteria", 1 ); // just a description + if( termcrit->type & CV_TERMCRIT_ITER ) + cvWriteInteger( fs, "max_iterations", termcrit->max_iter ); + if( termcrit->type & CV_TERMCRIT_EPS ) + cvWriteReal( fs, "accuracy", termcrit->epsilon ); + cvEndWriteStruct( fs ); + } +@endcode +@param fs File storage +@param name Name of the written value. Should be NULL if and only if the parent structure is a +sequence. +@param value The written value +*/ +CVAPI(void) cvWriteReal( CvFileStorage* fs, const char* name, double value ); + +/** @brief Writes a text string. + +The function writes a text string to file storage. +@param fs File storage +@param name Name of the written string . Should be NULL if and only if the parent structure is a +sequence. +@param str The written text string +@param quote If non-zero, the written string is put in quotes, regardless of whether they are +required. Otherwise, if the flag is zero, quotes are used only when they are required (e.g. when +the string starts with a digit or contains spaces). + */ +CVAPI(void) cvWriteString( CvFileStorage* fs, const char* name, + const char* str, int quote CV_DEFAULT(0) ); + +/** @brief Writes a comment. + +The function writes a comment into file storage. The comments are skipped when the storage is read. +@param fs File storage +@param comment The written comment, single-line or multi-line +@param eol_comment If non-zero, the function tries to put the comment at the end of current line. +If the flag is zero, if the comment is multi-line, or if it does not fit at the end of the current +line, the comment starts a new line. + */ +CVAPI(void) cvWriteComment( CvFileStorage* fs, const char* comment, + int eol_comment ); + +/** @brief Writes an object to file storage. + +The function writes an object to file storage. First, the appropriate type info is found using +cvTypeOf. Then, the write method associated with the type info is called. + +Attributes are used to customize the writing procedure. The standard types support the following +attributes (all the dt attributes have the same format as in cvWriteRawData): + +-# CvSeq + - **header_dt** description of user fields of the sequence header that follow CvSeq, or + CvChain (if the sequence is a Freeman chain) or CvContour (if the sequence is a contour or + point sequence) + - **dt** description of the sequence elements. + - **recursive** if the attribute is present and is not equal to "0" or "false", the whole + tree of sequences (contours) is stored. +-# CvGraph + - **header_dt** description of user fields of the graph header that follows CvGraph; + - **vertex_dt** description of user fields of graph vertices + - **edge_dt** description of user fields of graph edges (note that the edge weight is + always written, so there is no need to specify it explicitly) + +Below is the code that creates the YAML file shown in the CvFileStorage description: +@code + #include "cxcore.h" + + int main( int argc, char** argv ) + { + CvMat* mat = cvCreateMat( 3, 3, CV_32F ); + CvFileStorage* fs = cvOpenFileStorage( "example.yml", 0, CV_STORAGE_WRITE ); + + cvSetIdentity( mat ); + cvWrite( fs, "A", mat, cvAttrList(0,0) ); + + cvReleaseFileStorage( &fs ); + cvReleaseMat( &mat ); + return 0; + } +@endcode +@param fs File storage +@param name Name of the written object. Should be NULL if and only if the parent structure is a +sequence. +@param ptr Pointer to the object +@param attributes The attributes of the object. They are specific for each particular type (see +the discussion below). + */ +CVAPI(void) cvWrite( CvFileStorage* fs, const char* name, const void* ptr, + CvAttrList attributes CV_DEFAULT(cvAttrList())); + +/** @brief Starts the next stream. + +The function finishes the currently written stream and starts the next stream. In the case of XML +the file with multiple streams looks like this: +@code{.xml} + + + + + + + ... +@endcode +The YAML file will look like this: +@code{.yaml} + %YAML 1.0 + # stream #1 data + ... + --- + # stream #2 data +@endcode +This is useful for concatenating files or for resuming the writing process. +@param fs File storage + */ +CVAPI(void) cvStartNextStream( CvFileStorage* fs ); + +/** @brief Writes multiple numbers. + +The function writes an array, whose elements consist of single or multiple numbers. The function +call can be replaced with a loop containing a few cvWriteInt and cvWriteReal calls, but a single +call is more efficient. Note that because none of the elements have a name, they should be written +to a sequence rather than a map. +@param fs File storage +@param src Pointer to the written array +@param len Number of the array elements to write +@param dt Specification of each array element, see @ref format_spec "format specification" + */ +CVAPI(void) cvWriteRawData( CvFileStorage* fs, const void* src, + int len, const char* dt ); + +/** @brief Writes multiple numbers in Base64. + +If either CV_STORAGE_WRITE_BASE64 or cv::FileStorage::WRITE_BASE64 is used, +this function will be the same as cvWriteRawData. If neither, the main +difference is that it outputs a sequence in Base64 encoding rather than +in plain text. + +This function can only be used to write a sequence with a type "binary". + +@param fs File storage +@param src Pointer to the written array +@param len Number of the array elements to write +@param dt Specification of each array element, see @ref format_spec "format specification" +*/ +CVAPI(void) cvWriteRawDataBase64( CvFileStorage* fs, const void* src, + int len, const char* dt ); + +/** @brief Returns a unique pointer for a given name. + +The function returns a unique pointer for each particular file node name. This pointer can be then +passed to the cvGetFileNode function that is faster than cvGetFileNodeByName because it compares +text strings by comparing pointers rather than the strings' content. + +Consider the following example where an array of points is encoded as a sequence of 2-entry maps: +@code + points: + - { x: 10, y: 10 } + - { x: 20, y: 20 } + - { x: 30, y: 30 } + # ... +@endcode +Then, it is possible to get hashed "x" and "y" pointers to speed up decoding of the points. : +@code + #include "cxcore.h" + + int main( int argc, char** argv ) + { + CvFileStorage* fs = cvOpenFileStorage( "points.yml", 0, CV_STORAGE_READ ); + CvStringHashNode* x_key = cvGetHashedNode( fs, "x", -1, 1 ); + CvStringHashNode* y_key = cvGetHashedNode( fs, "y", -1, 1 ); + CvFileNode* points = cvGetFileNodeByName( fs, 0, "points" ); + + if( CV_NODE_IS_SEQ(points->tag) ) + { + CvSeq* seq = points->data.seq; + int i, total = seq->total; + CvSeqReader reader; + cvStartReadSeq( seq, &reader, 0 ); + for( i = 0; i < total; i++ ) + { + CvFileNode* pt = (CvFileNode*)reader.ptr; + #if 1 // faster variant + CvFileNode* xnode = cvGetFileNode( fs, pt, x_key, 0 ); + CvFileNode* ynode = cvGetFileNode( fs, pt, y_key, 0 ); + assert( xnode && CV_NODE_IS_INT(xnode->tag) && + ynode && CV_NODE_IS_INT(ynode->tag)); + int x = xnode->data.i; // or x = cvReadInt( xnode, 0 ); + int y = ynode->data.i; // or y = cvReadInt( ynode, 0 ); + #elif 1 // slower variant; does not use x_key & y_key + CvFileNode* xnode = cvGetFileNodeByName( fs, pt, "x" ); + CvFileNode* ynode = cvGetFileNodeByName( fs, pt, "y" ); + assert( xnode && CV_NODE_IS_INT(xnode->tag) && + ynode && CV_NODE_IS_INT(ynode->tag)); + int x = xnode->data.i; // or x = cvReadInt( xnode, 0 ); + int y = ynode->data.i; // or y = cvReadInt( ynode, 0 ); + #else // the slowest yet the easiest to use variant + int x = cvReadIntByName( fs, pt, "x", 0 ); + int y = cvReadIntByName( fs, pt, "y", 0 ); + #endif + CV_NEXT_SEQ_ELEM( seq->elem_size, reader ); + printf(" + } + } + cvReleaseFileStorage( &fs ); + return 0; + } +@endcode +Please note that whatever method of accessing a map you are using, it is still much slower than +using plain sequences; for example, in the above example, it is more efficient to encode the points +as pairs of integers in a single numeric sequence. +@param fs File storage +@param name Literal node name +@param len Length of the name (if it is known apriori), or -1 if it needs to be calculated +@param create_missing Flag that specifies, whether an absent key should be added into the hash table +*/ +CVAPI(CvStringHashNode*) cvGetHashedKey( CvFileStorage* fs, const char* name, + int len CV_DEFAULT(-1), + int create_missing CV_DEFAULT(0)); + +/** @brief Retrieves one of the top-level nodes of the file storage. + +The function returns one of the top-level file nodes. The top-level nodes do not have a name, they +correspond to the streams that are stored one after another in the file storage. If the index is out +of range, the function returns a NULL pointer, so all the top-level nodes can be iterated by +subsequent calls to the function with stream_index=0,1,..., until the NULL pointer is returned. +This function can be used as a base for recursive traversal of the file storage. +@param fs File storage +@param stream_index Zero-based index of the stream. See cvStartNextStream . In most cases, +there is only one stream in the file; however, there can be several. + */ +CVAPI(CvFileNode*) cvGetRootFileNode( const CvFileStorage* fs, + int stream_index CV_DEFAULT(0) ); + +/** @brief Finds a node in a map or file storage. + +The function finds a file node. It is a faster version of cvGetFileNodeByName (see +cvGetHashedKey discussion). Also, the function can insert a new node, if it is not in the map yet. +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. If both map and +key are NULLs, the function returns the root file node - a map that contains top-level nodes. +@param key Unique pointer to the node name, retrieved with cvGetHashedKey +@param create_missing Flag that specifies whether an absent node should be added to the map + */ +CVAPI(CvFileNode*) cvGetFileNode( CvFileStorage* fs, CvFileNode* map, + const CvStringHashNode* key, + int create_missing CV_DEFAULT(0) ); + +/** @brief Finds a node in a map or file storage. + +The function finds a file node by name. The node is searched either in map or, if the pointer is +NULL, among the top-level file storage nodes. Using this function for maps and cvGetSeqElem (or +sequence reader) for sequences, it is possible to navigate through the file storage. To speed up +multiple queries for a certain key (e.g., in the case of an array of structures) one may use a +combination of cvGetHashedKey and cvGetFileNode. +@param fs File storage +@param map The parent map. If it is NULL, the function searches in all the top-level nodes +(streams), starting with the first one. +@param name The file node name + */ +CVAPI(CvFileNode*) cvGetFileNodeByName( const CvFileStorage* fs, + const CvFileNode* map, + const char* name ); + +/** @brief Retrieves an integer value from a file node. + +The function returns an integer that is represented by the file node. If the file node is NULL, the +default_value is returned (thus, it is convenient to call the function right after cvGetFileNode +without checking for a NULL pointer). If the file node has type CV_NODE_INT, then node-\>data.i is +returned. If the file node has type CV_NODE_REAL, then node-\>data.f is converted to an integer +and returned. Otherwise the error is reported. +@param node File node +@param default_value The value that is returned if node is NULL + */ +CV_INLINE int cvReadInt( const CvFileNode* node, int default_value CV_DEFAULT(0) ) +{ + return !node ? default_value : + CV_NODE_IS_INT(node->tag) ? node->data.i : + CV_NODE_IS_REAL(node->tag) ? cvRound(node->data.f) : 0x7fffffff; +} + +/** @brief Finds a file node and returns its value. + +The function is a simple superposition of cvGetFileNodeByName and cvReadInt. +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param default_value The value that is returned if the file node is not found + */ +CV_INLINE int cvReadIntByName( const CvFileStorage* fs, const CvFileNode* map, + const char* name, int default_value CV_DEFAULT(0) ) +{ + return cvReadInt( cvGetFileNodeByName( fs, map, name ), default_value ); +} + +/** @brief Retrieves a floating-point value from a file node. + +The function returns a floating-point value that is represented by the file node. If the file node +is NULL, the default_value is returned (thus, it is convenient to call the function right after +cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_REAL , +then node-\>data.f is returned. If the file node has type CV_NODE_INT , then node-:math:\>data.f +is converted to floating-point and returned. Otherwise the result is not determined. +@param node File node +@param default_value The value that is returned if node is NULL + */ +CV_INLINE double cvReadReal( const CvFileNode* node, double default_value CV_DEFAULT(0.) ) +{ + return !node ? default_value : + CV_NODE_IS_INT(node->tag) ? (double)node->data.i : + CV_NODE_IS_REAL(node->tag) ? node->data.f : 1e300; +} + +/** @brief Finds a file node and returns its value. + +The function is a simple superposition of cvGetFileNodeByName and cvReadReal . +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param default_value The value that is returned if the file node is not found + */ +CV_INLINE double cvReadRealByName( const CvFileStorage* fs, const CvFileNode* map, + const char* name, double default_value CV_DEFAULT(0.) ) +{ + return cvReadReal( cvGetFileNodeByName( fs, map, name ), default_value ); +} + +/** @brief Retrieves a text string from a file node. + +The function returns a text string that is represented by the file node. If the file node is NULL, +the default_value is returned (thus, it is convenient to call the function right after +cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_STR , then +node-:math:\>data.str.ptr is returned. Otherwise the result is not determined. +@param node File node +@param default_value The value that is returned if node is NULL + */ +CV_INLINE const char* cvReadString( const CvFileNode* node, + const char* default_value CV_DEFAULT(NULL) ) +{ + return !node ? default_value : CV_NODE_IS_STRING(node->tag) ? node->data.str.ptr : 0; +} + +/** @brief Finds a file node by its name and returns its value. + +The function is a simple superposition of cvGetFileNodeByName and cvReadString . +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param default_value The value that is returned if the file node is not found + */ +CV_INLINE const char* cvReadStringByName( const CvFileStorage* fs, const CvFileNode* map, + const char* name, const char* default_value CV_DEFAULT(NULL) ) +{ + return cvReadString( cvGetFileNodeByName( fs, map, name ), default_value ); +} + + +/** @brief Decodes an object and returns a pointer to it. + +The function decodes a user object (creates an object in a native representation from the file +storage subtree) and returns it. The object to be decoded must be an instance of a registered type +that supports the read method (see CvTypeInfo). The type of the object is determined by the type +name that is encoded in the file. If the object is a dynamic structure, it is created either in +memory storage and passed to cvOpenFileStorage or, if a NULL pointer was passed, in temporary +memory storage, which is released when cvReleaseFileStorage is called. Otherwise, if the object is +not a dynamic structure, it is created in a heap and should be released with a specialized function +or by using the generic cvRelease. +@param fs File storage +@param node The root object node +@param attributes Unused parameter + */ +CVAPI(void*) cvRead( CvFileStorage* fs, CvFileNode* node, + CvAttrList* attributes CV_DEFAULT(NULL)); + +/** @brief Finds an object by name and decodes it. + +The function is a simple superposition of cvGetFileNodeByName and cvRead. +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param attributes Unused parameter + */ +CV_INLINE void* cvReadByName( CvFileStorage* fs, const CvFileNode* map, + const char* name, CvAttrList* attributes CV_DEFAULT(NULL) ) +{ + return cvRead( fs, cvGetFileNodeByName( fs, map, name ), attributes ); +} + + +/** @brief Initializes the file node sequence reader. + +The function initializes the sequence reader to read data from a file node. The initialized reader +can be then passed to cvReadRawDataSlice. +@param fs File storage +@param src The file node (a sequence) to read numbers from +@param reader Pointer to the sequence reader + */ +CVAPI(void) cvStartReadRawData( const CvFileStorage* fs, const CvFileNode* src, + CvSeqReader* reader ); + +/** @brief Initializes file node sequence reader. + +The function reads one or more elements from the file node, representing a sequence, to a +user-specified array. The total number of read sequence elements is a product of total and the +number of components in each array element. For example, if dt=2if, the function will read total\*3 +sequence elements. As with any sequence, some parts of the file node sequence can be skipped or read +repeatedly by repositioning the reader using cvSetSeqReaderPos. +@param fs File storage +@param reader The sequence reader. Initialize it with cvStartReadRawData . +@param count The number of elements to read +@param dst Pointer to the destination array +@param dt Specification of each array element. It has the same format as in cvWriteRawData . + */ +CVAPI(void) cvReadRawDataSlice( const CvFileStorage* fs, CvSeqReader* reader, + int count, void* dst, const char* dt ); + +/** @brief Reads multiple numbers. + +The function reads elements from a file node that represents a sequence of scalars. +@param fs File storage +@param src The file node (a sequence) to read numbers from +@param dst Pointer to the destination array +@param dt Specification of each array element. It has the same format as in cvWriteRawData . + */ +CVAPI(void) cvReadRawData( const CvFileStorage* fs, const CvFileNode* src, + void* dst, const char* dt ); + +/** @brief Writes a file node to another file storage. + +The function writes a copy of a file node to file storage. Possible applications of the function are +merging several file storages into one and conversion between XML, YAML and JSON formats. +@param fs Destination file storage +@param new_node_name New name of the file node in the destination file storage. To keep the +existing name, use cvcvGetFileNodeName +@param node The written node +@param embed If the written node is a collection and this parameter is not zero, no extra level of +hierarchy is created. Instead, all the elements of node are written into the currently written +structure. Of course, map elements can only be embedded into another map, and sequence elements +can only be embedded into another sequence. + */ +CVAPI(void) cvWriteFileNode( CvFileStorage* fs, const char* new_node_name, + const CvFileNode* node, int embed ); + +/** @brief Returns the name of a file node. + +The function returns the name of a file node or NULL, if the file node does not have a name or if +node is NULL. +@param node File node + */ +CVAPI(const char*) cvGetFileNodeName( const CvFileNode* node ); + +/*********************************** Adding own types ***********************************/ + +/** @brief Registers a new type. + +The function registers a new type, which is described by info . The function creates a copy of the +structure, so the user should delete it after calling the function. +@param info Type info structure + */ +CVAPI(void) cvRegisterType( const CvTypeInfo* info ); + +/** @brief Unregisters the type. + +The function unregisters a type with a specified name. If the name is unknown, it is possible to +locate the type info by an instance of the type using cvTypeOf or by iterating the type list, +starting from cvFirstType, and then calling cvUnregisterType(info-\>typeName). +@param type_name Name of an unregistered type + */ +CVAPI(void) cvUnregisterType( const char* type_name ); + +/** @brief Returns the beginning of a type list. + +The function returns the first type in the list of registered types. Navigation through the list can +be done via the prev and next fields of the CvTypeInfo structure. + */ +CVAPI(CvTypeInfo*) cvFirstType(void); + +/** @brief Finds a type by its name. + +The function finds a registered type by its name. It returns NULL if there is no type with the +specified name. +@param type_name Type name + */ +CVAPI(CvTypeInfo*) cvFindType( const char* type_name ); + +/** @brief Returns the type of an object. + +The function finds the type of a given object. It iterates through the list of registered types and +calls the is_instance function/method for every type info structure with that object until one of +them returns non-zero or until the whole list has been traversed. In the latter case, the function +returns NULL. +@param struct_ptr The object pointer + */ +CVAPI(CvTypeInfo*) cvTypeOf( const void* struct_ptr ); + +#endif + +/** @brief Releases an object. + + The function finds the type of a given object and calls release with the double pointer. + @param struct_ptr Double pointer to the object + */ +CVAPI(void) cvRelease( void** struct_ptr ); + +/** @brief Makes a clone of an object. + +The function finds the type of a given object and calls clone with the passed object. Of course, if +you know the object type, for example, struct_ptr is CvMat\*, it is faster to call the specific +function, like cvCloneMat. +@param struct_ptr The object to clone + */ +CVAPI(void*) cvClone( const void* struct_ptr ); + +/*********************************** Measuring Execution Time ***************************/ + +/** helper functions for RNG initialization and accurate time measurement: + uses internal clock counter on x86 */ +CVAPI(int64) cvGetTickCount( void ); +CVAPI(double) cvGetTickFrequency( void ); + +/*********************************** CPU capabilities ***********************************/ + +CVAPI(int) cvCheckHardwareSupport(int feature); + +/*********************************** Multi-Threading ************************************/ + +/** retrieve/set the number of threads used in OpenMP implementations */ +CVAPI(int) cvGetNumThreads( void ); +CVAPI(void) cvSetNumThreads( int threads CV_DEFAULT(0) ); +/** get index of the thread being executed */ +CVAPI(int) cvGetThreadNum( void ); + + +/********************************** Error Handling **************************************/ + +/** Get current OpenCV error status */ +CVAPI(int) cvGetErrStatus( void ); + +/** Sets error status silently */ +CVAPI(void) cvSetErrStatus( int status ); + +#define CV_ErrModeLeaf 0 /* Print error and exit program */ +#define CV_ErrModeParent 1 /* Print error and continue */ +#define CV_ErrModeSilent 2 /* Don't print and continue */ + +/** Retrieves current error processing mode */ +CVAPI(int) cvGetErrMode( void ); + +/** Sets error processing mode, returns previously used mode */ +CVAPI(int) cvSetErrMode( int mode ); + +/** Sets error status and performs some additional actions (displaying message box, + writing message to stderr, terminating application etc.) + depending on the current error mode */ +CVAPI(void) cvError( int status, const char* func_name, + const char* err_msg, const char* file_name, int line ); + +/** Retrieves textual description of the error given its code */ +CVAPI(const char*) cvErrorStr( int status ); + +/** Retrieves detailed information about the last error occurred */ +CVAPI(int) cvGetErrInfo( const char** errcode_desc, const char** description, + const char** filename, int* line ); + +/** Maps IPP error codes to the counterparts from OpenCV */ +CVAPI(int) cvErrorFromIppStatus( int ipp_status ); + +typedef int (CV_CDECL *CvErrorCallback)( int status, const char* func_name, + const char* err_msg, const char* file_name, int line, void* userdata ); + +/** Assigns a new error-handling function */ +CVAPI(CvErrorCallback) cvRedirectError( CvErrorCallback error_handler, + void* userdata CV_DEFAULT(NULL), + void** prev_userdata CV_DEFAULT(NULL) ); + +/** Output nothing */ +CVAPI(int) cvNulDevReport( int status, const char* func_name, const char* err_msg, + const char* file_name, int line, void* userdata ); + +/** Output to console(fprintf(stderr,...)) */ +CVAPI(int) cvStdErrReport( int status, const char* func_name, const char* err_msg, + const char* file_name, int line, void* userdata ); + +/** Output to MessageBox(WIN32) */ +CVAPI(int) cvGuiBoxReport( int status, const char* func_name, const char* err_msg, + const char* file_name, int line, void* userdata ); + +#define OPENCV_ERROR(status,func,context) \ +cvError((status),(func),(context),__FILE__,__LINE__) + +#define OPENCV_ASSERT(expr,func,context) \ +{if (! (expr)) \ +{OPENCV_ERROR(CV_StsInternal,(func),(context));}} + +#define OPENCV_CALL( Func ) \ +{ \ +Func; \ +} + + +/** CV_FUNCNAME macro defines icvFuncName constant which is used by CV_ERROR macro */ +#ifdef CV_NO_FUNC_NAMES +#define CV_FUNCNAME( Name ) +#define cvFuncName "" +#else +#define CV_FUNCNAME( Name ) \ +static char cvFuncName[] = Name +#endif + + +/** + CV_ERROR macro unconditionally raises error with passed code and message. + After raising error, control will be transferred to the exit label. + */ +#define CV_ERROR( Code, Msg ) \ +{ \ + cvError( (Code), cvFuncName, Msg, __FILE__, __LINE__ ); \ + __CV_EXIT__; \ +} + +/** + CV_CHECK macro checks error status after CV (or IPL) + function call. If error detected, control will be transferred to the exit + label. + */ +#define CV_CHECK() \ +{ \ + if( cvGetErrStatus() < 0 ) \ + CV_ERROR( CV_StsBackTrace, "Inner function failed." ); \ +} + + +/** + CV_CALL macro calls CV (or IPL) function, checks error status and + signals a error if the function failed. Useful in "parent node" + error processing mode + */ +#define CV_CALL( Func ) \ +{ \ + Func; \ + CV_CHECK(); \ +} + + +/** Runtime assertion macro */ +#define CV_ASSERT( Condition ) \ +{ \ + if( !(Condition) ) \ + CV_ERROR( CV_StsInternal, "Assertion: " #Condition " failed" ); \ +} + +#define __CV_BEGIN__ { +#define __CV_END__ goto exit; exit: ; } +#define __CV_EXIT__ goto exit + +/** @} core_c */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#ifdef __cplusplus + +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @addtogroup core_c_glue +//! @{ + +/////////////////////////////////////////// glue /////////////////////////////////////////// + +//! converts array (CvMat or IplImage) to cv::Mat +CV_EXPORTS Mat cvarrToMat(const CvArr* arr, bool copyData=false, + bool allowND=true, int coiMode=0, + AutoBuffer* buf=0); + +static inline Mat cvarrToMatND(const CvArr* arr, bool copyData=false, int coiMode=0) +{ + return cvarrToMat(arr, copyData, true, coiMode); +} + + +//! extracts Channel of Interest from CvMat or IplImage and makes cv::Mat out of it. +CV_EXPORTS void extractImageCOI(const CvArr* arr, OutputArray coiimg, int coi=-1); +//! inserts single-channel cv::Mat into a multi-channel CvMat or IplImage +CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1); + + + +////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types ////// + +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMat* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(IplImage* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMatND* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvSparseMat* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMemStorage* obj) const; }; + +////////////// convenient wrappers for operating old-style dynamic structures ////////////// + +template class SeqIterator; + +typedef Ptr MemStorage; + +/*! + Template Sequence Class derived from CvSeq + + The class provides more convenient access to sequence elements, + STL-style operations and iterators. + + \note The class is targeted for simple data types, + i.e. no constructors or destructors + are called for the sequence elements. +*/ +template class Seq +{ +public: + typedef SeqIterator<_Tp> iterator; + typedef SeqIterator<_Tp> const_iterator; + + //! the default constructor + Seq(); + //! the constructor for wrapping CvSeq structure. The real element type in CvSeq should match _Tp. + Seq(const CvSeq* seq); + //! creates the empty sequence that resides in the specified storage + Seq(MemStorage& storage, int headerSize = sizeof(CvSeq)); + //! returns read-write reference to the specified element + _Tp& operator [](int idx); + //! returns read-only reference to the specified element + const _Tp& operator[](int idx) const; + //! returns iterator pointing to the beginning of the sequence + SeqIterator<_Tp> begin() const; + //! returns iterator pointing to the element following the last sequence element + SeqIterator<_Tp> end() const; + //! returns the number of elements in the sequence + size_t size() const; + //! returns the type of sequence elements (CV_8UC1 ... CV_64FC(CV_CN_MAX) ...) + int type() const; + //! returns the depth of sequence elements (CV_8U ... CV_64F) + int depth() const; + //! returns the number of channels in each sequence element + int channels() const; + //! returns the size of each sequence element + size_t elemSize() const; + //! returns index of the specified sequence element + size_t index(const _Tp& elem) const; + //! appends the specified element to the end of the sequence + void push_back(const _Tp& elem); + //! appends the specified element to the front of the sequence + void push_front(const _Tp& elem); + //! appends zero or more elements to the end of the sequence + void push_back(const _Tp* elems, size_t count); + //! appends zero or more elements to the front of the sequence + void push_front(const _Tp* elems, size_t count); + //! inserts the specified element to the specified position + void insert(int idx, const _Tp& elem); + //! inserts zero or more elements to the specified position + void insert(int idx, const _Tp* elems, size_t count); + //! removes element at the specified position + void remove(int idx); + //! removes the specified subsequence + void remove(const Range& r); + + //! returns reference to the first sequence element + _Tp& front(); + //! returns read-only reference to the first sequence element + const _Tp& front() const; + //! returns reference to the last sequence element + _Tp& back(); + //! returns read-only reference to the last sequence element + const _Tp& back() const; + //! returns true iff the sequence contains no elements + bool empty() const; + + //! removes all the elements from the sequence + void clear(); + //! removes the first element from the sequence + void pop_front(); + //! removes the last element from the sequence + void pop_back(); + //! removes zero or more elements from the beginning of the sequence + void pop_front(_Tp* elems, size_t count); + //! removes zero or more elements from the end of the sequence + void pop_back(_Tp* elems, size_t count); + + //! copies the whole sequence or the sequence slice to the specified vector + void copyTo(std::vector<_Tp>& vec, const Range& range=Range::all()) const; + //! returns the vector containing all the sequence elements + operator std::vector<_Tp>() const; + + CvSeq* seq; +}; + + +/*! + STL-style Sequence Iterator inherited from the CvSeqReader structure +*/ +template class SeqIterator : public CvSeqReader +{ +public: + //! the default constructor + SeqIterator(); + //! the constructor setting the iterator to the beginning or to the end of the sequence + SeqIterator(const Seq<_Tp>& seq, bool seekEnd=false); + //! positions the iterator within the sequence + void seek(size_t pos); + //! reports the current iterator position + size_t tell() const; + //! returns reference to the current sequence element + _Tp& operator *(); + //! returns read-only reference to the current sequence element + const _Tp& operator *() const; + //! moves iterator to the next sequence element + SeqIterator& operator ++(); + //! moves iterator to the next sequence element + SeqIterator operator ++(int) const; + //! moves iterator to the previous sequence element + SeqIterator& operator --(); + //! moves iterator to the previous sequence element + SeqIterator operator --(int) const; + + //! moves iterator forward by the specified offset (possibly negative) + SeqIterator& operator +=(int); + //! moves iterator backward by the specified offset (possibly negative) + SeqIterator& operator -=(int); + + // this is index of the current element module seq->total*2 + // (to distinguish between 0 and seq->total) + int index; +}; + + + +// bridge C++ => C Seq API +CV_EXPORTS schar* seqPush( CvSeq* seq, const void* element=0); +CV_EXPORTS schar* seqPushFront( CvSeq* seq, const void* element=0); +CV_EXPORTS void seqPop( CvSeq* seq, void* element=0); +CV_EXPORTS void seqPopFront( CvSeq* seq, void* element=0); +CV_EXPORTS void seqPopMulti( CvSeq* seq, void* elements, + int count, int in_front=0 ); +CV_EXPORTS void seqRemove( CvSeq* seq, int index ); +CV_EXPORTS void clearSeq( CvSeq* seq ); +CV_EXPORTS schar* getSeqElem( const CvSeq* seq, int index ); +CV_EXPORTS void seqRemoveSlice( CvSeq* seq, CvSlice slice ); +CV_EXPORTS void seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr ); + +template inline Seq<_Tp>::Seq() : seq(0) {} +template inline Seq<_Tp>::Seq( const CvSeq* _seq ) : seq((CvSeq*)_seq) +{ + CV_Assert(!_seq || _seq->elem_size == sizeof(_Tp)); +} + +template inline Seq<_Tp>::Seq( MemStorage& storage, + int headerSize ) +{ + CV_Assert(headerSize >= (int)sizeof(CvSeq)); + seq = cvCreateSeq(DataType<_Tp>::type, headerSize, sizeof(_Tp), storage); +} + +template inline _Tp& Seq<_Tp>::operator [](int idx) +{ return *(_Tp*)getSeqElem(seq, idx); } + +template inline const _Tp& Seq<_Tp>::operator [](int idx) const +{ return *(_Tp*)getSeqElem(seq, idx); } + +template inline SeqIterator<_Tp> Seq<_Tp>::begin() const +{ return SeqIterator<_Tp>(*this); } + +template inline SeqIterator<_Tp> Seq<_Tp>::end() const +{ return SeqIterator<_Tp>(*this, true); } + +template inline size_t Seq<_Tp>::size() const +{ return seq ? seq->total : 0; } + +template inline int Seq<_Tp>::type() const +{ return seq ? CV_MAT_TYPE(seq->flags) : 0; } + +template inline int Seq<_Tp>::depth() const +{ return seq ? CV_MAT_DEPTH(seq->flags) : 0; } + +template inline int Seq<_Tp>::channels() const +{ return seq ? CV_MAT_CN(seq->flags) : 0; } + +template inline size_t Seq<_Tp>::elemSize() const +{ return seq ? seq->elem_size : 0; } + +template inline size_t Seq<_Tp>::index(const _Tp& elem) const +{ return cvSeqElemIdx(seq, &elem); } + +template inline void Seq<_Tp>::push_back(const _Tp& elem) +{ cvSeqPush(seq, &elem); } + +template inline void Seq<_Tp>::push_front(const _Tp& elem) +{ cvSeqPushFront(seq, &elem); } + +template inline void Seq<_Tp>::push_back(const _Tp* elem, size_t count) +{ cvSeqPushMulti(seq, elem, (int)count, 0); } + +template inline void Seq<_Tp>::push_front(const _Tp* elem, size_t count) +{ cvSeqPushMulti(seq, elem, (int)count, 1); } + +template inline _Tp& Seq<_Tp>::back() +{ return *(_Tp*)getSeqElem(seq, -1); } + +template inline const _Tp& Seq<_Tp>::back() const +{ return *(const _Tp*)getSeqElem(seq, -1); } + +template inline _Tp& Seq<_Tp>::front() +{ return *(_Tp*)getSeqElem(seq, 0); } + +template inline const _Tp& Seq<_Tp>::front() const +{ return *(const _Tp*)getSeqElem(seq, 0); } + +template inline bool Seq<_Tp>::empty() const +{ return !seq || seq->total == 0; } + +template inline void Seq<_Tp>::clear() +{ if(seq) clearSeq(seq); } + +template inline void Seq<_Tp>::pop_back() +{ seqPop(seq); } + +template inline void Seq<_Tp>::pop_front() +{ seqPopFront(seq); } + +template inline void Seq<_Tp>::pop_back(_Tp* elem, size_t count) +{ seqPopMulti(seq, elem, (int)count, 0); } + +template inline void Seq<_Tp>::pop_front(_Tp* elem, size_t count) +{ seqPopMulti(seq, elem, (int)count, 1); } + +template inline void Seq<_Tp>::insert(int idx, const _Tp& elem) +{ seqInsert(seq, idx, &elem); } + +template inline void Seq<_Tp>::insert(int idx, const _Tp* elems, size_t count) +{ + CvMat m = cvMat(1, count, DataType<_Tp>::type, elems); + seqInsertSlice(seq, idx, &m); +} + +template inline void Seq<_Tp>::remove(int idx) +{ seqRemove(seq, idx); } + +template inline void Seq<_Tp>::remove(const Range& r) +{ seqRemoveSlice(seq, cvSlice(r.start, r.end)); } + +template inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const Range& range) const +{ + size_t len = !seq ? 0 : range == Range::all() ? seq->total : range.end - range.start; + vec.resize(len); + if( seq && len ) + cvCvtSeqToArray(seq, &vec[0], cvSlice(range)); +} + +template inline Seq<_Tp>::operator std::vector<_Tp>() const +{ + std::vector<_Tp> vec; + copyTo(vec); + return vec; +} + +template inline SeqIterator<_Tp>::SeqIterator() +{ memset(this, 0, sizeof(*this)); } + +template inline SeqIterator<_Tp>::SeqIterator(const Seq<_Tp>& _seq, bool seekEnd) +{ + cvStartReadSeq(_seq.seq, this); + index = seekEnd ? _seq.seq->total : 0; +} + +template inline void SeqIterator<_Tp>::seek(size_t pos) +{ + cvSetSeqReaderPos(this, (int)pos, false); + index = pos; +} + +template inline size_t SeqIterator<_Tp>::tell() const +{ return index; } + +template inline _Tp& SeqIterator<_Tp>::operator *() +{ return *(_Tp*)ptr; } + +template inline const _Tp& SeqIterator<_Tp>::operator *() const +{ return *(const _Tp*)ptr; } + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator ++() +{ + CV_NEXT_SEQ_ELEM(sizeof(_Tp), *this); + if( ++index >= seq->total*2 ) + index = 0; + return *this; +} + +template inline SeqIterator<_Tp> SeqIterator<_Tp>::operator ++(int) const +{ + SeqIterator<_Tp> it = *this; + ++*this; + return it; +} + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator --() +{ + CV_PREV_SEQ_ELEM(sizeof(_Tp), *this); + if( --index < 0 ) + index = seq->total*2-1; + return *this; +} + +template inline SeqIterator<_Tp> SeqIterator<_Tp>::operator --(int) const +{ + SeqIterator<_Tp> it = *this; + --*this; + return it; +} + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator +=(int delta) +{ + cvSetSeqReaderPos(this, delta, 1); + index += delta; + int n = seq->total*2; + if( index < 0 ) + index += n; + if( index >= n ) + index -= n; + return *this; +} + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator -=(int delta) +{ + return (*this += -delta); +} + +template inline ptrdiff_t operator - (const SeqIterator<_Tp>& a, + const SeqIterator<_Tp>& b) +{ + ptrdiff_t delta = a.index - b.index, n = a.seq->total; + if( delta > n || delta < -n ) + delta += delta < 0 ? n : -n; + return delta; +} + +template inline bool operator == (const SeqIterator<_Tp>& a, + const SeqIterator<_Tp>& b) +{ + return a.seq == b.seq && a.index == b.index; +} + +template inline bool operator != (const SeqIterator<_Tp>& a, + const SeqIterator<_Tp>& b) +{ + return !(a == b); +} + +//! @} + +} // cv + +#endif + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda.hpp new file mode 100644 index 0000000..5fa0968 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda.hpp @@ -0,0 +1,1055 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDA_HPP +#define OPENCV_CORE_CUDA_HPP + +#ifndef __cplusplus +# error cuda.hpp header must be compiled as C++ +#endif + +#include "opencv2/core.hpp" +#include "opencv2/core/cuda_types.hpp" + +/** + @defgroup cuda CUDA-accelerated Computer Vision + @{ + @defgroup cudacore Core part + @{ + @defgroup cudacore_init Initialization and Information + @defgroup cudacore_struct Data Structures + @} + @} + */ + +namespace cv { namespace cuda { + +//! @addtogroup cudacore_struct +//! @{ + +//=================================================================================== +// GpuMat +//=================================================================================== + +/** @brief Base storage class for GPU memory with reference counting. + +Its interface matches the Mat interface with the following limitations: + +- no arbitrary dimensions support (only 2D) +- no functions that return references to their data (because references on GPU are not valid for + CPU) +- no expression templates technique support + +Beware that the latter limitation may lead to overloaded matrix operators that cause memory +allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be +passed directly to the kernel. + +@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are +aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix. + +@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely +on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory +release function returns error if the CUDA context has been destroyed before. + +Some member functions are described as a "Blocking Call" while some are described as a +"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU +operation is finished when the function returns. However, non-blocking functions are asynchronous to +host. Those functions may return even if the GPU operation is not finished. + +Compared to their blocking counterpart, non-blocking functions accept Stream as an additional +argument. If a non-default stream is passed, the GPU operation may overlap with operations in other +streams. + +@sa Mat + */ +class CV_EXPORTS_W GpuMat +{ +public: + class CV_EXPORTS_W Allocator + { + public: + virtual ~Allocator() {} + + // allocator must fill data, step and refcount fields + virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0; + virtual void free(GpuMat* mat) = 0; + }; + + //! default allocator + CV_WRAP static GpuMat::Allocator* defaultAllocator(); + CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator); + + //! default constructor + CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! constructs GpuMat of the specified size and type + CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! constructs GpuMat and fills it with the specified value _s + CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! copy constructor + CV_WRAP GpuMat(const GpuMat& m); + + //! constructor for GpuMat headers pointing to user-allocated data + GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); + GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); + + //! creates a GpuMat header for a part of the bigger matrix + CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange); + CV_WRAP GpuMat(const GpuMat& m, Rect roi); + + //! builds GpuMat from host memory (Blocking call) + CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! destructor - calls release() + ~GpuMat(); + + //! assignment operators + GpuMat& operator =(const GpuMat& m); + + //! allocates new GpuMat data unless the GpuMat already has specified size and type + CV_WRAP void create(int rows, int cols, int type); + CV_WRAP void create(Size size, int type); + + //! decreases reference counter, deallocate the data when reference counter reaches 0 + void release(); + + //! swaps with other smart pointer + CV_WRAP void swap(GpuMat& mat); + + /** @brief Performs data upload to GpuMat (Blocking call) + + This function copies data from host memory to device memory. As being a blocking call, it is + guaranteed that the copy operation is finished when this function returns. + */ + CV_WRAP void upload(InputArray arr); + + /** @brief Performs data upload to GpuMat (Non-Blocking call) + + This function copies data from host memory to device memory. As being a non-blocking call, this + function may return even if the copy operation is not finished. + + The copy operation may be overlapped with operations in other non-default streams if \p stream is + not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option. + */ + CV_WRAP void upload(InputArray arr, Stream& stream); + + /** @brief Performs data download from GpuMat (Blocking call) + + This function copies data from device memory to host memory. As being a blocking call, it is + guaranteed that the copy operation is finished when this function returns. + */ + CV_WRAP void download(OutputArray dst) const; + + /** @brief Performs data download from GpuMat (Non-Blocking call) + + This function copies data from device memory to host memory. As being a non-blocking call, this + function may return even if the copy operation is not finished. + + The copy operation may be overlapped with operations in other non-default streams if \p stream is + not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option. + */ + CV_WRAP void download(OutputArray dst, Stream& stream) const; + + //! returns deep copy of the GpuMat, i.e. the data is copied + CV_WRAP GpuMat clone() const; + + //! copies the GpuMat content to device memory (Blocking call) + CV_WRAP void copyTo(OutputArray dst) const; + + //! copies the GpuMat content to device memory (Non-Blocking call) + CV_WRAP void copyTo(OutputArray dst, Stream& stream) const; + + //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) + CV_WRAP void copyTo(OutputArray dst, InputArray mask) const; + + //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) + CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + + //! sets some of the GpuMat elements to s (Blocking call) + CV_WRAP GpuMat& setTo(Scalar s); + + //! sets some of the GpuMat elements to s (Non-Blocking call) + CV_WRAP GpuMat& setTo(Scalar s, Stream& stream); + + //! sets some of the GpuMat elements to s, according to the mask (Blocking call) + CV_WRAP GpuMat& setTo(Scalar s, InputArray mask); + + //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call) + CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); + + //! converts GpuMat to another datatype (Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype) const; + + //! converts GpuMat to another datatype (Non-Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const; + + //! converts GpuMat to another datatype with scaling (Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; + + //! converts GpuMat to another datatype with scaling (Non-Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; + + //! converts GpuMat to another datatype with scaling (Non-Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; + + CV_WRAP void assignTo(GpuMat& m, int type = -1) const; + + //! returns pointer to y-th row + uchar* ptr(int y = 0); + const uchar* ptr(int y = 0) const; + + //! template version of the above method + template _Tp* ptr(int y = 0); + template const _Tp* ptr(int y = 0) const; + + template operator PtrStepSz<_Tp>() const; + template operator PtrStep<_Tp>() const; + + //! returns a new GpuMat header for the specified row + CV_WRAP GpuMat row(int y) const; + + //! returns a new GpuMat header for the specified column + CV_WRAP GpuMat col(int x) const; + + //! ... for the specified row span + CV_WRAP GpuMat rowRange(int startrow, int endrow) const; + CV_WRAP GpuMat rowRange(Range r) const; + + //! ... for the specified column span + CV_WRAP GpuMat colRange(int startcol, int endcol) const; + CV_WRAP GpuMat colRange(Range r) const; + + //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.) + GpuMat operator ()(Range rowRange, Range colRange) const; + GpuMat operator ()(Rect roi) const; + + //! creates alternative GpuMat header for the same data, with different + //! number of channels and/or different number of rows + CV_WRAP GpuMat reshape(int cn, int rows = 0) const; + + //! locates GpuMat header within a parent GpuMat + CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const; + + //! moves/resizes the current GpuMat ROI inside the parent GpuMat + CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); + + //! returns true iff the GpuMat data is continuous + //! (i.e. when there are no gaps between successive rows) + CV_WRAP bool isContinuous() const; + + //! returns element size in bytes + CV_WRAP size_t elemSize() const; + + //! returns the size of element channel in bytes + CV_WRAP size_t elemSize1() const; + + //! returns element type + CV_WRAP int type() const; + + //! returns element type + CV_WRAP int depth() const; + + //! returns number of channels + CV_WRAP int channels() const; + + //! returns step/elemSize1() + CV_WRAP size_t step1() const; + + //! returns GpuMat size : width == number of columns, height == number of rows + CV_WRAP Size size() const; + + //! returns true if GpuMat data is NULL + CV_WRAP bool empty() const; + + // returns pointer to cuda memory + CV_WRAP void* cudaPtr() const; + + //! internal use method: updates the continuity flag + CV_WRAP void updateContinuityFlag(); + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! the number of rows and columns + int rows, cols; + + //! a distance between successive rows in bytes; includes the gap if any + CV_PROP size_t step; + + //! pointer to the data + uchar* data; + + //! pointer to the reference counter; + //! when GpuMat points to user-allocated data, the pointer is NULL + int* refcount; + + //! helper fields used in locateROI and adjustROI + uchar* datastart; + const uchar* dataend; + + //! allocator + Allocator* allocator; +}; + +/** @brief Creates a continuous matrix. + +@param rows Row count. +@param cols Column count. +@param type Type of the matrix. +@param arr Destination matrix. This parameter changes only if it has a proper type and area ( +\f$\texttt{rows} \times \texttt{cols}\f$ ). + +Matrix is called continuous if its elements are stored continuously, that is, without gaps at the +end of each row. + */ +CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr); + +/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type. + +@param rows Minimum desired number of rows. +@param cols Minimum desired number of columns. +@param type Desired matrix type. +@param arr Destination matrix. + +The function does not reallocate memory if the matrix has proper attributes already. + */ +CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); + +/** @brief BufferPool for use with CUDA streams + +BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is +only useful when enabled with #setBufferPoolUsage. + +@code + setBufferPoolUsage(true); +@endcode + +@note #setBufferPoolUsage must be called \em before any Stream declaration. + +Users may specify custom allocator for Stream and may implement their own stream based +functions utilizing the same underlying GPU memory management. + +If custom allocator is not specified, BufferPool utilizes StackAllocator by +default. StackAllocator allocates a chunk of GPU device memory beforehand, +and when GpuMat is declared later on, it is given the pre-allocated memory. +This kind of strategy reduces the number of calls for memory allocating APIs +such as cudaMalloc or cudaMallocPitch. + +Below is an example that utilizes BufferPool with StackAllocator: + +@code + #include + + using namespace cv; + using namespace cv::cuda + + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks) + + Stream stream1, stream2; // Each stream uses 1 stack + BufferPool pool1(stream1), pool2(stream2); + + GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB + GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full + + GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB + GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB + + cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1); + cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2); + } +@endcode + +If we allocate another GpuMat on pool1 in the above example, it will be carried out by +the DefaultAllocator since the stack for pool1 is full. + +@code + GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator +@endcode + +If a third stream is declared in the above example, allocating with #getBuffer +within that stream will also be carried out by the DefaultAllocator because we've run out of +stacks. + +@code + Stream stream3; // Only 2 stacks were allocated, we've run out of stacks + BufferPool pool3(stream3); + GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator +@endcode + +@warning When utilizing StackAllocator, deallocation order is important. + +Just like a stack, deallocation must be done in LIFO order. Below is an example of +erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this +sample code will emit CV_Assert error. + +@code + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + Stream stream; // A default size (10 MB) stack is allocated to this stream + BufferPool pool(stream); + + GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB) + GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB) + + mat1.release(); // erroneous usage : mat2 must be deallocated before mat1 + } +@endcode + +Since C++ local variables are destroyed in the reverse order of construction, +the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated +and the corresponding memory is automatically returned to the pool for later usage. + +@code + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks) + + Stream stream1, stream2; // Each stream uses 1 stack + BufferPool pool1(stream1), pool2(stream2); + + for (int i = 0; i < 10; i++) + { + GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB + GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full + + GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB + GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB + + d_src1.setTo(Scalar(i), stream1); + d_src2.setTo(Scalar(i), stream2); + + cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1); + cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2); + // The order of destruction of the local variables is: + // d_dst2 => d_src2 => d_dst1 => d_src1 + // LIFO rule is satisfied, this code runs without error + } + } +@endcode + */ +class CV_EXPORTS_W BufferPool +{ +public: + + //! Gets the BufferPool for the given stream. + explicit BufferPool(Stream& stream); + + //! Allocates a new GpuMat of given size and type. + CV_WRAP GpuMat getBuffer(int rows, int cols, int type); + + //! Allocates a new GpuMat of given size and type. + CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } + + //! Returns the allocator associated with the stream. + CV_WRAP Ptr getAllocator() const { return allocator_; } + +private: + Ptr allocator_; +}; + +//! BufferPool management (must be called before Stream creation) +CV_EXPORTS_W void setBufferPoolUsage(bool on); +CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); + +//=================================================================================== +// HostMem +//=================================================================================== + +/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA. + +Its interface is also Mat-like but with additional memory type parameters. + +- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous + uploading/downloading data from/to GPU. +- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU + address space, if supported. +- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are + used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache + utilization. + +@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 +Pinned Memory APIs* document or *CUDA C Programming Guide*. + */ +class CV_EXPORTS_W HostMem +{ +public: + enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 }; + + static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + HostMem(const HostMem& m); + + CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + //! creates from host memory with coping data + CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + ~HostMem(); + + HostMem& operator =(const HostMem& m); + + //! swaps with other smart pointer + CV_WRAP void swap(HostMem& b); + + //! returns deep copy of the matrix, i.e. the data is copied + CV_WRAP HostMem clone() const; + + //! allocates new matrix data unless the matrix already has specified size and type. + CV_WRAP void create(int rows, int cols, int type); + void create(Size size, int type); + + //! creates alternative HostMem header for the same data, with different + //! number of channels and/or different number of rows + CV_WRAP HostMem reshape(int cn, int rows = 0) const; + + //! decrements reference counter and released memory if needed. + void release(); + + //! returns matrix header with disabled reference counting for HostMem data. + CV_WRAP Mat createMatHeader() const; + + /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting + for it. + + This can be done only if memory was allocated with the SHARED flag and if it is supported by the + hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which + eliminates an extra copy. + */ + GpuMat createGpuMatHeader() const; + + // Please see cv::Mat for descriptions + CV_WRAP bool isContinuous() const; + CV_WRAP size_t elemSize() const; + CV_WRAP size_t elemSize1() const; + CV_WRAP int type() const; + CV_WRAP int depth() const; + CV_WRAP int channels() const; + CV_WRAP size_t step1() const; + CV_WRAP Size size() const; + CV_WRAP bool empty() const; + + // Please see cv::Mat for descriptions + int flags; + int rows, cols; + CV_PROP size_t step; + + uchar* data; + int* refcount; + + uchar* datastart; + const uchar* dataend; + + AllocType alloc_type; +}; + +/** @brief Page-locks the memory of matrix and maps it for the device(s). + +@param m Input matrix. + */ +CV_EXPORTS_W void registerPageLocked(Mat& m); + +/** @brief Unmaps the memory of matrix and makes it pageable again. + +@param m Input matrix. + */ +CV_EXPORTS_W void unregisterPageLocked(Mat& m); + +//=================================================================================== +// Stream +//=================================================================================== + +/** @brief This class encapsulates a queue of asynchronous calls. + +@note Currently, you may face problems if an operation is enqueued twice with different data. Some +functions use the constant GPU memory, and next call may update the memory before the previous one +has been finished. But calling different operations asynchronously is safe because each operation +has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are +also safe. + +@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads. + +@code +void thread1() +{ + cv::cuda::Stream stream1; + cv::cuda::func1(..., stream1); +} + +void thread2() +{ + cv::cuda::Stream stream2; + cv::cuda::func2(..., stream2); +} +@endcode + +@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user. +In multi-threading environment the stream objects must be passed explicitly (see previous note). + */ +class CV_EXPORTS_W Stream +{ + typedef void (Stream::*bool_type)() const; + void this_type_does_not_support_comparisons() const {} + +public: + typedef void (*StreamCallback)(int status, void* userData); + + //! creates a new asynchronous stream + CV_WRAP Stream(); + + //! creates a new asynchronous stream with custom allocator + CV_WRAP Stream(const Ptr& allocator); + + /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false. + */ + CV_WRAP bool queryIfComplete() const; + + /** @brief Blocks the current CPU thread until all operations in the stream are complete. + */ + CV_WRAP void waitForCompletion(); + + /** @brief Makes a compute stream wait on an event. + */ + CV_WRAP void waitEvent(const Event& event); + + /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have + completed. + + @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization + that may depend on outstanding device work or other callbacks that are not mandated to run earlier. + Callbacks without a mandated order (in independent streams) execute in undefined order and may be + serialized. + */ + void enqueueHostCallback(StreamCallback callback, void* userData); + + //! return Stream object for default CUDA stream + CV_WRAP static Stream& Null(); + + //! returns true if stream object is not default (!= 0) + operator bool_type() const; + + //! return Pointer to CUDA stream + CV_WRAP void* cudaPtr() const; + + class Impl; + +private: + Ptr impl_; + Stream(const Ptr& impl); + + friend struct StreamAccessor; + friend class BufferPool; + friend class DefaultDeviceInitializer; +}; + +class CV_EXPORTS_W Event +{ +public: + enum CreateFlags + { + DEFAULT = 0x00, /**< Default event flag */ + BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */ + DISABLE_TIMING = 0x02, /**< Event will not record timing data */ + INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ + }; + + CV_WRAP explicit Event(Event::CreateFlags flags = Event::CreateFlags::DEFAULT); + + //! records an event + CV_WRAP void record(Stream& stream = Stream::Null()); + + //! queries an event's status + CV_WRAP bool queryIfComplete() const; + + //! waits for an event to complete + CV_WRAP void waitForCompletion(); + + //! computes the elapsed time between events + CV_WRAP static float elapsedTime(const Event& start, const Event& end); + + class Impl; + +private: + Ptr impl_; + Event(const Ptr& impl); + + friend struct EventAccessor; +}; + +//! @} cudacore_struct + +//=================================================================================== +// Initialization & Info +//=================================================================================== + +//! @addtogroup cudacore_init +//! @{ + +/** @brief Returns the number of installed CUDA-enabled devices. + +Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support, +this function returns 0. If the CUDA driver is not installed, or is incompatible, this function +returns -1. + */ +CV_EXPORTS_W int getCudaEnabledDeviceCount(); + +/** @brief Sets a device and initializes it for the current thread. + +@param device System index of a CUDA device starting with 0. + +If the call of this function is omitted, a default device is initialized at the fist CUDA usage. + */ +CV_EXPORTS_W void setDevice(int device); + +/** @brief Returns the current device index set by cuda::setDevice or initialized by default. + */ +CV_EXPORTS_W int getDevice(); + +/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current +process. + +Any subsequent API call to this device will reinitialize the device. + */ +CV_EXPORTS_W void resetDevice(); + +/** @brief Enumeration providing CUDA computing features. + */ +enum FeatureSet +{ + FEATURE_SET_COMPUTE_10 = 10, + FEATURE_SET_COMPUTE_11 = 11, + FEATURE_SET_COMPUTE_12 = 12, + FEATURE_SET_COMPUTE_13 = 13, + FEATURE_SET_COMPUTE_20 = 20, + FEATURE_SET_COMPUTE_21 = 21, + FEATURE_SET_COMPUTE_30 = 30, + FEATURE_SET_COMPUTE_32 = 32, + FEATURE_SET_COMPUTE_35 = 35, + FEATURE_SET_COMPUTE_50 = 50, + + GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, + SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, + NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13, + WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30, + DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35 +}; + +//! checks whether current device supports the given feature +CV_EXPORTS bool deviceSupports(FeatureSet feature_set); + +/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was +built for. + +According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute +capability can always be compiled to binary code of greater or equal compute capability". + */ +class CV_EXPORTS_W TargetArchs +{ +public: + /** @brief The following method checks whether the module was built with the support of the given feature: + + @param feature_set Features to be checked. See :ocvcuda::FeatureSet. + */ + static bool builtWith(FeatureSet feature_set); + + /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA + code for the given architecture(s): + + @param major Major compute capability version. + @param minor Minor compute capability version. + */ + CV_WRAP static bool has(int major, int minor); + CV_WRAP static bool hasPtx(int major, int minor); + CV_WRAP static bool hasBin(int major, int minor); + + CV_WRAP static bool hasEqualOrLessPtx(int major, int minor); + CV_WRAP static bool hasEqualOrGreater(int major, int minor); + CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor); + CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor); +}; + +/** @brief Class providing functionality for querying the specified GPU properties. + */ +class CV_EXPORTS_W DeviceInfo +{ +public: + //! creates DeviceInfo object for the current GPU + CV_WRAP DeviceInfo(); + + /** @brief The constructors. + + @param device_id System index of the CUDA device starting with 0. + + Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it + constructs an object for the current device. + */ + CV_WRAP DeviceInfo(int device_id); + + /** @brief Returns system index of the CUDA device starting with 0. + */ + CV_WRAP int deviceID() const; + + //! ASCII string identifying device + const char* name() const; + + //! global memory available on device in bytes + CV_WRAP size_t totalGlobalMem() const; + + //! shared memory available per block in bytes + CV_WRAP size_t sharedMemPerBlock() const; + + //! 32-bit registers available per block + CV_WRAP int regsPerBlock() const; + + //! warp size in threads + CV_WRAP int warpSize() const; + + //! maximum pitch in bytes allowed by memory copies + CV_WRAP size_t memPitch() const; + + //! maximum number of threads per block + CV_WRAP int maxThreadsPerBlock() const; + + //! maximum size of each dimension of a block + CV_WRAP Vec3i maxThreadsDim() const; + + //! maximum size of each dimension of a grid + CV_WRAP Vec3i maxGridSize() const; + + //! clock frequency in kilohertz + CV_WRAP int clockRate() const; + + //! constant memory available on device in bytes + CV_WRAP size_t totalConstMem() const; + + //! major compute capability + CV_WRAP int majorVersion() const; + + //! minor compute capability + CV_WRAP int minorVersion() const; + + //! alignment requirement for textures + CV_WRAP size_t textureAlignment() const; + + //! pitch alignment requirement for texture references bound to pitched memory + CV_WRAP size_t texturePitchAlignment() const; + + //! number of multiprocessors on device + CV_WRAP int multiProcessorCount() const; + + //! specified whether there is a run time limit on kernels + CV_WRAP bool kernelExecTimeoutEnabled() const; + + //! device is integrated as opposed to discrete + CV_WRAP bool integrated() const; + + //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer + CV_WRAP bool canMapHostMemory() const; + + enum ComputeMode + { + ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */ + ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */ + ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */ + ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */ + }; + + //! compute mode + CV_WRAP DeviceInfo::ComputeMode computeMode() const; + + //! maximum 1D texture size + CV_WRAP int maxTexture1D() const; + + //! maximum 1D mipmapped texture size + CV_WRAP int maxTexture1DMipmap() const; + + //! maximum size for 1D textures bound to linear memory + CV_WRAP int maxTexture1DLinear() const; + + //! maximum 2D texture dimensions + CV_WRAP Vec2i maxTexture2D() const; + + //! maximum 2D mipmapped texture dimensions + CV_WRAP Vec2i maxTexture2DMipmap() const; + + //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory + CV_WRAP Vec3i maxTexture2DLinear() const; + + //! maximum 2D texture dimensions if texture gather operations have to be performed + CV_WRAP Vec2i maxTexture2DGather() const; + + //! maximum 3D texture dimensions + CV_WRAP Vec3i maxTexture3D() const; + + //! maximum Cubemap texture dimensions + CV_WRAP int maxTextureCubemap() const; + + //! maximum 1D layered texture dimensions + CV_WRAP Vec2i maxTexture1DLayered() const; + + //! maximum 2D layered texture dimensions + CV_WRAP Vec3i maxTexture2DLayered() const; + + //! maximum Cubemap layered texture dimensions + CV_WRAP Vec2i maxTextureCubemapLayered() const; + + //! maximum 1D surface size + CV_WRAP int maxSurface1D() const; + + //! maximum 2D surface dimensions + CV_WRAP Vec2i maxSurface2D() const; + + //! maximum 3D surface dimensions + CV_WRAP Vec3i maxSurface3D() const; + + //! maximum 1D layered surface dimensions + CV_WRAP Vec2i maxSurface1DLayered() const; + + //! maximum 2D layered surface dimensions + CV_WRAP Vec3i maxSurface2DLayered() const; + + //! maximum Cubemap surface dimensions + CV_WRAP int maxSurfaceCubemap() const; + + //! maximum Cubemap layered surface dimensions + CV_WRAP Vec2i maxSurfaceCubemapLayered() const; + + //! alignment requirements for surfaces + CV_WRAP size_t surfaceAlignment() const; + + //! device can possibly execute multiple kernels concurrently + CV_WRAP bool concurrentKernels() const; + + //! device has ECC support enabled + CV_WRAP bool ECCEnabled() const; + + //! PCI bus ID of the device + CV_WRAP int pciBusID() const; + + //! PCI device ID of the device + CV_WRAP int pciDeviceID() const; + + //! PCI domain ID of the device + CV_WRAP int pciDomainID() const; + + //! true if device is a Tesla device using TCC driver, false otherwise + CV_WRAP bool tccDriver() const; + + //! number of asynchronous engines + CV_WRAP int asyncEngineCount() const; + + //! device shares a unified address space with the host + CV_WRAP bool unifiedAddressing() const; + + //! peak memory clock frequency in kilohertz + CV_WRAP int memoryClockRate() const; + + //! global memory bus width in bits + CV_WRAP int memoryBusWidth() const; + + //! size of L2 cache in bytes + CV_WRAP int l2CacheSize() const; + + //! maximum resident threads per multiprocessor + CV_WRAP int maxThreadsPerMultiProcessor() const; + + //! gets free and total device memory + CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const; + CV_WRAP size_t freeMemory() const; + CV_WRAP size_t totalMemory() const; + + /** @brief Provides information on CUDA feature support. + + @param feature_set Features to be checked. See cuda::FeatureSet. + + This function returns true if the device has the specified CUDA feature. Otherwise, it returns false + */ + bool supports(FeatureSet feature_set) const; + + /** @brief Checks the CUDA module and device compatibility. + + This function returns true if the CUDA module can be run on the specified device. Otherwise, it + returns false . + */ + CV_WRAP bool isCompatible() const; + +private: + int device_id_; +}; + +CV_EXPORTS_W void printCudaDeviceInfo(int device); +CV_EXPORTS_W void printShortCudaDeviceInfo(int device); + +/** @brief Converts an array to half precision floating number. + +@param _src input array. +@param _dst output array. +@param stream Stream for the asynchronous version. +@sa convertFp16 +*/ +CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null()); + +//! @} cudacore_init + +}} // namespace cv { namespace cuda { + + +#include "opencv2/core/cuda.inl.hpp" + +#endif /* OPENCV_CORE_CUDA_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda.inl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda.inl.hpp new file mode 100644 index 0000000..30fc0ae --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda.inl.hpp @@ -0,0 +1,637 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDAINL_HPP +#define OPENCV_CORE_CUDAINL_HPP + +#include "opencv2/core/cuda.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { + +//=================================================================================== +// GpuMat +//=================================================================================== + +inline +GpuMat::GpuMat(Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{} + +inline +GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (rows_ > 0 && cols_ > 0) + create(rows_, cols_, type_); +} + +inline +GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (size_.height > 0 && size_.width > 0) + create(size_.height, size_.width, type_); +} + +inline +GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (rows_ > 0 && cols_ > 0) + { + create(rows_, cols_, type_); + setTo(s_); + } +} + +inline +GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (size_.height > 0 && size_.width > 0) + { + create(size_.height, size_.width, type_); + setTo(s_); + } +} + +inline +GpuMat::GpuMat(const GpuMat& m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator) +{ + if (refcount) + CV_XADD(refcount, 1); +} + +inline +GpuMat::GpuMat(InputArray arr, Allocator* allocator_) : + flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + upload(arr); +} + +inline +GpuMat::~GpuMat() +{ + release(); +} + +inline +GpuMat& GpuMat::operator =(const GpuMat& m) +{ + if (this != &m) + { + GpuMat temp(m); + swap(temp); + } + + return *this; +} + +inline +void GpuMat::create(Size size_, int type_) +{ + create(size_.height, size_.width, type_); +} + +inline +void GpuMat::swap(GpuMat& b) +{ + std::swap(flags, b.flags); + std::swap(rows, b.rows); + std::swap(cols, b.cols); + std::swap(step, b.step); + std::swap(data, b.data); + std::swap(datastart, b.datastart); + std::swap(dataend, b.dataend); + std::swap(refcount, b.refcount); + std::swap(allocator, b.allocator); +} + +inline +GpuMat GpuMat::clone() const +{ + GpuMat m; + copyTo(m); + return m; +} + +inline +void GpuMat::copyTo(OutputArray dst, InputArray mask) const +{ + copyTo(dst, mask, Stream::Null()); +} + +inline +GpuMat& GpuMat::setTo(Scalar s) +{ + return setTo(s, Stream::Null()); +} + +inline +GpuMat& GpuMat::setTo(Scalar s, InputArray mask) +{ + return setTo(s, mask, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype) const +{ + convertTo(dst, rtype, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const +{ + convertTo(dst, rtype, alpha, beta, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const +{ + convertTo(dst, rtype, alpha, 0.0, stream); +} + +inline +void GpuMat::assignTo(GpuMat& m, int _type) const +{ + if (_type < 0) + m = *this; + else + convertTo(m, _type); +} + +inline +uchar* GpuMat::ptr(int y) +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step * y; +} + +inline +const uchar* GpuMat::ptr(int y) const +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step * y; +} + +template inline +_Tp* GpuMat::ptr(int y) +{ + return (_Tp*)ptr(y); +} + +template inline +const _Tp* GpuMat::ptr(int y) const +{ + return (const _Tp*)ptr(y); +} + +template inline +GpuMat::operator PtrStepSz() const +{ + return PtrStepSz(rows, cols, (T*)data, step); +} + +template inline +GpuMat::operator PtrStep() const +{ + return PtrStep((T*)data, step); +} + +inline +GpuMat GpuMat::row(int y) const +{ + return GpuMat(*this, Range(y, y+1), Range::all()); +} + +inline +GpuMat GpuMat::col(int x) const +{ + return GpuMat(*this, Range::all(), Range(x, x+1)); +} + +inline +GpuMat GpuMat::rowRange(int startrow, int endrow) const +{ + return GpuMat(*this, Range(startrow, endrow), Range::all()); +} + +inline +GpuMat GpuMat::rowRange(Range r) const +{ + return GpuMat(*this, r, Range::all()); +} + +inline +GpuMat GpuMat::colRange(int startcol, int endcol) const +{ + return GpuMat(*this, Range::all(), Range(startcol, endcol)); +} + +inline +GpuMat GpuMat::colRange(Range r) const +{ + return GpuMat(*this, Range::all(), r); +} + +inline +GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const +{ + return GpuMat(*this, rowRange_, colRange_); +} + +inline +GpuMat GpuMat::operator ()(Rect roi) const +{ + return GpuMat(*this, roi); +} + +inline +bool GpuMat::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +inline +size_t GpuMat::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t GpuMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int GpuMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int GpuMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int GpuMat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +size_t GpuMat::step1() const +{ + return step / elemSize1(); +} + +inline +Size GpuMat::size() const +{ + return Size(cols, rows); +} + +inline +bool GpuMat::empty() const +{ + return data == 0; +} + +inline +void* GpuMat::cudaPtr() const +{ + return data; +} + +static inline +GpuMat createContinuous(int rows, int cols, int type) +{ + GpuMat m; + createContinuous(rows, cols, type, m); + return m; +} + +static inline +void createContinuous(Size size, int type, OutputArray arr) +{ + createContinuous(size.height, size.width, type, arr); +} + +static inline +GpuMat createContinuous(Size size, int type) +{ + GpuMat m; + createContinuous(size, type, m); + return m; +} + +static inline +void ensureSizeIsEnough(Size size, int type, OutputArray arr) +{ + ensureSizeIsEnough(size.height, size.width, type, arr); +} + +static inline +void swap(GpuMat& a, GpuMat& b) +{ + a.swap(b); +} + +//=================================================================================== +// HostMem +//=================================================================================== + +inline +HostMem::HostMem(AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ +} + +inline +HostMem::HostMem(const HostMem& m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type) +{ + if( refcount ) + CV_XADD(refcount, 1); +} + +inline +HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ + if (rows_ > 0 && cols_ > 0) + create(rows_, cols_, type_); +} + +inline +HostMem::HostMem(Size size_, int type_, AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ + if (size_.height > 0 && size_.width > 0) + create(size_.height, size_.width, type_); +} + +inline +HostMem::HostMem(InputArray arr, AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ + arr.getMat().copyTo(*this); +} + +inline +HostMem::~HostMem() +{ + release(); +} + +inline +HostMem& HostMem::operator =(const HostMem& m) +{ + if (this != &m) + { + HostMem temp(m); + swap(temp); + } + + return *this; +} + +inline +void HostMem::swap(HostMem& b) +{ + std::swap(flags, b.flags); + std::swap(rows, b.rows); + std::swap(cols, b.cols); + std::swap(step, b.step); + std::swap(data, b.data); + std::swap(datastart, b.datastart); + std::swap(dataend, b.dataend); + std::swap(refcount, b.refcount); + std::swap(alloc_type, b.alloc_type); +} + +inline +HostMem HostMem::clone() const +{ + HostMem m(size(), type(), alloc_type); + createMatHeader().copyTo(m); + return m; +} + +inline +void HostMem::create(Size size_, int type_) +{ + create(size_.height, size_.width, type_); +} + +inline +Mat HostMem::createMatHeader() const +{ + return Mat(size(), type(), data, step); +} + +inline +bool HostMem::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +inline +size_t HostMem::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t HostMem::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int HostMem::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int HostMem::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int HostMem::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +size_t HostMem::step1() const +{ + return step / elemSize1(); +} + +inline +Size HostMem::size() const +{ + return Size(cols, rows); +} + +inline +bool HostMem::empty() const +{ + return data == 0; +} + +static inline +void swap(HostMem& a, HostMem& b) +{ + a.swap(b); +} + +//=================================================================================== +// Stream +//=================================================================================== + +inline +Stream::Stream(const Ptr& impl) + : impl_(impl) +{ +} + +//=================================================================================== +// Event +//=================================================================================== + +inline +Event::Event(const Ptr& impl) + : impl_(impl) +{ +} + +//=================================================================================== +// Initialization & Info +//=================================================================================== + +inline +bool TargetArchs::has(int major, int minor) +{ + return hasPtx(major, minor) || hasBin(major, minor); +} + +inline +bool TargetArchs::hasEqualOrGreater(int major, int minor) +{ + return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor); +} + +inline +DeviceInfo::DeviceInfo() +{ + device_id_ = getDevice(); +} + +inline +DeviceInfo::DeviceInfo(int device_id) +{ + CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() ); + device_id_ = device_id; +} + +inline +int DeviceInfo::deviceID() const +{ + return device_id_; +} + +inline +size_t DeviceInfo::freeMemory() const +{ + size_t _totalMemory = 0, _freeMemory = 0; + queryMemory(_totalMemory, _freeMemory); + return _freeMemory; +} + +inline +size_t DeviceInfo::totalMemory() const +{ + size_t _totalMemory = 0, _freeMemory = 0; + queryMemory(_totalMemory, _freeMemory); + return _totalMemory; +} + +inline +bool DeviceInfo::supports(FeatureSet feature_set) const +{ + int version = majorVersion() * 10 + minorVersion(); + return version >= feature_set; +} + + +}} // namespace cv { namespace cuda { + +//=================================================================================== +// Mat +//=================================================================================== + +namespace cv { + +inline +Mat::Mat(const cuda::GpuMat& m) + : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows) +{ + m.download(*this); +} + +} + +//! @endcond + +#endif // OPENCV_CORE_CUDAINL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/block.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/block.hpp new file mode 100644 index 0000000..c277f0e --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/block.hpp @@ -0,0 +1,211 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP +#define OPENCV_CUDA_DEVICE_BLOCK_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct Block + { + static __device__ __forceinline__ unsigned int id() + { + return blockIdx.x; + } + + static __device__ __forceinline__ unsigned int stride() + { + return blockDim.x * blockDim.y * blockDim.z; + } + + static __device__ __forceinline__ void sync() + { + __syncthreads(); + } + + static __device__ __forceinline__ int flattenedThreadId() + { + return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + } + + template + static __device__ __forceinline__ void fill(It beg, It end, const T& value) + { + int STRIDE = stride(); + It t = beg + flattenedThreadId(); + + for(; t < end; t += STRIDE) + *t = value; + } + + template + static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) + { + int STRIDE = stride(); + int tid = flattenedThreadId(); + value += tid; + + for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE) + *t = value; + } + + template + static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out) + { + int STRIDE = stride(); + InIt t = beg + flattenedThreadId(); + OutIt o = out + (t - beg); + + for(; t < end; t += STRIDE, o += STRIDE) + *o = *t; + } + + template + static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op) + { + int STRIDE = stride(); + InIt t = beg + flattenedThreadId(); + OutIt o = out + (t - beg); + + for(; t < end; t += STRIDE, o += STRIDE) + *o = op(*t); + } + + template + static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) + { + int STRIDE = stride(); + InIt1 t1 = beg1 + flattenedThreadId(); + InIt2 t2 = beg2 + flattenedThreadId(); + OutIt o = out + (t1 - beg1); + + for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE) + *o = op(*t1, *t2); + } + + template + static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op) + { + int tid = flattenedThreadId(); + T val = buffer[tid]; + + if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } + if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } + if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } + if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } + + if (tid < 32) + { + if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } + if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } + if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } + if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } + if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } + if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } + } + } + + template + static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op) + { + int tid = flattenedThreadId(); + T val = buffer[tid] = init; + __syncthreads(); + + if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } + if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } + if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } + if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } + + if (tid < 32) + { + if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } + if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } + if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } + if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } + if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } + if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } + } + __syncthreads(); + return buffer[0]; + } + + template + static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op) + { + int ftid = flattenedThreadId(); + int sft = stride(); + + if (sft < n) + { + for (unsigned int i = sft + ftid; i < n; i += sft) + data[ftid] = op(data[ftid], data[i]); + + __syncthreads(); + + n = sft; + } + + while (n > 1) + { + unsigned int half = n/2; + + if (ftid < half) + data[ftid] = op(data[ftid], data[n - ftid - 1]); + + __syncthreads(); + + n = n - half; + } + } + }; +}}} + +//! @endcond + +#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/border_interpolate.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/border_interpolate.hpp new file mode 100644 index 0000000..874f705 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/border_interpolate.hpp @@ -0,0 +1,722 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP +#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP + +#include "saturate_cast.hpp" +#include "vec_traits.hpp" +#include "vec_math.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + ////////////////////////////////////////////////////////////// + // BrdConstant + + template struct BrdRowConstant + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits::all(0)) : width(width_), val(val_) {} + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return x >= 0 ? saturate_cast(data[x]) : val; + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return x < width ? saturate_cast(data[x]) : val; + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return (x >= 0 && x < width) ? saturate_cast(data[x]) : val; + } + + int width; + D val; + }; + + template struct BrdColConstant + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits::all(0)) : height(height_), val(val_) {} + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return y >= 0 ? saturate_cast(*(const T*)((const char*)data + y * step)) : val; + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return y < height ? saturate_cast(*(const T*)((const char*)data + y * step)) : val; + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return (y >= 0 && y < height) ? saturate_cast(*(const T*)((const char*)data + y * step)) : val; + } + + int height; + D val; + }; + + template struct BrdConstant + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits::all(0)) : height(height_), width(width_), val(val_) + { + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast(((const T*)((const uchar*)data + y * step))[x]) : val; + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast(src(y, x)) : val; + } + + int height; + int width; + D val; + }; + + ////////////////////////////////////////////////////////////// + // BrdReplicate + + template struct BrdRowReplicate + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::max(x, 0); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::min(x, last_col); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int last_col; + }; + + template struct BrdColReplicate + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {} + template __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::max(y, 0); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::min(y, last_row); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const T*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const T*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const T*)((const char*)data + idx_row(y) * step)); + } + + int last_row; + }; + + template struct BrdReplicate + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::max(y, 0); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::min(y, last_row); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::max(x, 0); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::min(x, last_col); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int last_row; + int last_col; + }; + + ////////////////////////////////////////////////////////////// + // BrdReflect101 + + template struct BrdRowReflect101 + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::abs(x) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int last_col; + }; + + template struct BrdColReflect101 + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {} + template __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::abs(y) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + int last_row; + }; + + template struct BrdReflect101 + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::abs(y) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::abs(x) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int last_row; + int last_col; + }; + + ////////////////////////////////////////////////////////////// + // BrdReflect + + template struct BrdRowReflect + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (::abs(x) - (x < 0)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(::abs(x) - (x < 0)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int last_col; + }; + + template struct BrdColReflect + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {} + template __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (::abs(y) - (y < 0)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(::abs(y) - (y < 0)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + int last_row; + }; + + template struct BrdReflect + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (::abs(y) - (y < 0)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/; + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (::abs(x) - (x < 0)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (last_col - ::abs(last_col - x) + (x > last_col)); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int last_row; + int last_col; + }; + + ////////////////////////////////////////////////////////////// + // BrdWrap + + template struct BrdRowWrap + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {} + template __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (x < width) * x + (x >= width) * (x % width); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(idx_col_low(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int width; + }; + + template struct BrdColWrap + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {} + template __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return (y < height) * y + (y >= height) * (y % height); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(idx_row_low(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + int height; + }; + + template struct BrdWrap + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) : + height(height_), width(width_) + { + } + template + __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) : + height(height_), width(width_) + { + } + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (y >= 0) ? y : (y - ((y - height + 1) / height) * height); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return (y < height) ? y : (y % height); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(idx_row_low(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (x >= 0) ? x : (x - ((x - width + 1) / width) * width); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (x < width) ? x : (x % width); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(idx_col_low(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int height; + int width; + }; + + ////////////////////////////////////////////////////////////// + // BorderReader + + template struct BorderReader + { + typedef typename B::result_type elem_type; + typedef typename Ptr2D::index_type index_type; + + __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {} + + __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const + { + return b.at(y, x, ptr); + } + + Ptr2D ptr; + B b; + }; + + // under win32 there is some bug with templated types that passed as kernel parameters + // with this specialization all works fine + template struct BorderReader< Ptr2D, BrdConstant > + { + typedef typename BrdConstant::result_type elem_type; + typedef typename Ptr2D::index_type index_type; + + __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant& b) : + src(src_), height(b.height), width(b.width), val(b.val) + { + } + + __device__ __forceinline__ D operator ()(index_type y, index_type x) const + { + return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast(src(y, x)) : val; + } + + Ptr2D src; + int height; + int width; + D val; + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/color.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/color.hpp new file mode 100644 index 0000000..dcce280 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/color.hpp @@ -0,0 +1,309 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_COLOR_HPP +#define OPENCV_CUDA_COLOR_HPP + +#include "detail/color_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements + // template class ColorSpace1_to_ColorSpace2_traits + // { + // typedef ... functor_type; + // static __host__ __device__ functor_type create_functor(); + // }; + + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6) + + #undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3) + OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4) + + #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5) + OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6) + + #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6) + + #undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS + + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS + + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS + + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS + + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS + + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS + + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS + + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_COLOR_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/common.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/common.hpp new file mode 100644 index 0000000..80b2ff0 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/common.hpp @@ -0,0 +1,123 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_COMMON_HPP +#define OPENCV_CUDA_COMMON_HPP + +#include +#include "opencv2/core/cuda_types.hpp" +#include "opencv2/core/cvdef.h" +#include "opencv2/core/base.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +#ifndef CV_PI_F + #ifndef CV_PI + #define CV_PI_F 3.14159265f + #else + #define CV_PI_F ((float)CV_PI) + #endif +#endif + +namespace cv { namespace cuda { + static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func) + { + if (cudaSuccess != err) + cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line); + } +}} + +#ifndef cudaSafeCall + #define cudaSafeCall(expr) cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func) +#endif + +namespace cv { namespace cuda +{ + template static inline bool isAligned(const T* ptr, size_t size) + { + return reinterpret_cast(ptr) % size == 0; + } + + static inline bool isAligned(size_t step, size_t size) + { + return step % size == 0; + } +}} + +namespace cv { namespace cuda +{ + namespace device + { + __host__ __device__ __forceinline__ int divUp(int total, int grain) + { + return (total + grain - 1) / grain; + } + + template inline void bindTexture(const textureReference* tex, const PtrStepSz& img) + { + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); + } + + template inline void createTextureObjectPitch2D(cudaTextureObject_t* tex, PtrStepSz& img, const cudaTextureDesc& texDesc) + { + cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = static_cast(img.ptr()); + resDesc.res.pitch2D.height = img.rows; + resDesc.res.pitch2D.width = img.cols; + resDesc.res.pitch2D.pitchInBytes = img.step; + resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); + + cudaSafeCall( cudaCreateTextureObject(tex, &resDesc, &texDesc, NULL) ); + } + } +}} + +//! @endcond + +#endif // OPENCV_CUDA_COMMON_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/datamov_utils.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/datamov_utils.hpp new file mode 100644 index 0000000..6820d0f --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/datamov_utils.hpp @@ -0,0 +1,113 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP +#define OPENCV_CUDA_DATAMOV_UTILS_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 + + // for Fermi memory space is detected automatically + template struct ForceGlob + { + __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; } + }; + + #else // __CUDA_ARCH__ >= 200 + + #if defined(_WIN64) || defined(__LP64__) + // 64-bit register modifier for inlined asm + #define OPENCV_CUDA_ASM_PTR "l" + #else + // 32-bit register modifier for inlined asm + #define OPENCV_CUDA_ASM_PTR "r" + #endif + + template struct ForceGlob; + + #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \ + template <> struct ForceGlob \ + { \ + __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ + { \ + asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ + } \ + }; + + #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \ + template <> struct ForceGlob \ + { \ + __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ + { \ + asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ + } \ + }; + + OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8) + OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8) + OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8) + OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h) + OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h) + OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r) + OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r) + OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f) + OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d) + + #undef OPENCV_CUDA_DEFINE_FORCE_GLOB + #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B + #undef OPENCV_CUDA_ASM_PTR + + #endif // __CUDA_ARCH__ >= 200 +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/color_detail.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/color_detail.hpp new file mode 100644 index 0000000..f4b4796 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/color_detail.hpp @@ -0,0 +1,2018 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_COLOR_DETAIL_HPP +#define OPENCV_CUDA_COLOR_DETAIL_HPP + +#include "../common.hpp" +#include "../vec_traits.hpp" +#include "../saturate_cast.hpp" +#include "../limits.hpp" +#include "../functional.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + #ifndef CV_DESCALE + #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) + #endif + + namespace color_detail + { + template struct ColorChannel + { + typedef float worktype_f; + static __device__ __forceinline__ T max() { return numeric_limits::max(); } + static __device__ __forceinline__ T half() { return (T)(max()/2 + 1); } + }; + + template<> struct ColorChannel + { + typedef float worktype_f; + static __device__ __forceinline__ float max() { return 1.f; } + static __device__ __forceinline__ float half() { return 0.5f; } + }; + + template static __device__ __forceinline__ void setAlpha(typename TypeVec::vec_type& vec, T val) + { + } + + template static __device__ __forceinline__ void setAlpha(typename TypeVec::vec_type& vec, T val) + { + vec.w = val; + } + + template static __device__ __forceinline__ T getAlpha(const typename TypeVec::vec_type& vec) + { + return ColorChannel::max(); + } + + template static __device__ __forceinline__ T getAlpha(const typename TypeVec::vec_type& vec) + { + return vec.w; + } + + //constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601 + constexpr float B2YF = 0.114f; + constexpr float G2YF = 0.587f; + constexpr float R2YF = 0.299f; + + //to YCbCr + constexpr float YCBF = 0.564f; // == 1/2/(1-B2YF) + constexpr float YCRF = 0.713f; // == 1/2/(1-R2YF) + const int YCBI = 9241; // == YCBF*16384 + const int YCRI = 11682; // == YCRF*16384 + //to YUV + constexpr float B2UF = 0.492f; + constexpr float R2VF = 0.877f; + const int B2UI = 8061; // == B2UF*16384 + const int R2VI = 14369; // == R2VF*16384 + //from YUV + constexpr float U2BF = 2.032f; + constexpr float U2GF = -0.395f; + constexpr float V2GF = -0.581f; + constexpr float V2RF = 1.140f; + const int U2BI = 33292; + const int U2GI = -6472; + const int V2GI = -9519; + const int V2RI = 18678; + //from YCrCb + constexpr float CB2BF = 1.773f; + constexpr float CB2GF = -0.344f; + constexpr float CR2GF = -0.714f; + constexpr float CR2RF = 1.403f; + const int CB2BI = 29049; + const int CB2GI = -5636; + const int CR2GI = -11698; + const int CR2RI = 22987; + + enum + { + yuv_shift = 14, + xyz_shift = 12, + gray_shift = 15, + R2Y = 4899, + G2Y = 9617, + B2Y = 1868, + RY15 = 9798, // == R2YF*32768 + 0.5 + GY15 = 19235, // == G2YF*32768 + 0.5 + BY15 = 3735, // == B2YF*32768 + 0.5 + BLOCK_SIZE = 256 + }; + } + +////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// + + namespace color_detail + { + template struct RGB2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + dst.x = (&src.x)[bidx]; + dst.y = src.y; + dst.z = (&src.x)[bidx^2]; + setAlpha(dst, getAlpha(src)); + + return dst; + } + + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} + }; + + template <> struct RGB2RGB : unary_function + { + __device__ uint operator()(uint src) const + { + uint dst = 0; + + dst |= (0xffu & (src >> 16)); + dst |= (0xffu & (src >> 8)) << 8; + dst |= (0xffu & (src)) << 16; + dst |= (0xffu & (src >> 24)) << 24; + + return dst; + } + + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB ////////// + + namespace color_detail + { + template struct RGB2RGB5x5Converter; + template struct RGB2RGB5x5Converter<6, bidx> + { + static __device__ __forceinline__ ushort cvt(const uchar3& src) + { + return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8)); + } + + static __device__ __forceinline__ ushort cvt(uint src) + { + uint b = 0xffu & (src >> (bidx * 8)); + uint g = 0xffu & (src >> 8); + uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + return (ushort)((b >> 3) | ((g & ~3) << 3) | ((r & ~7) << 8)); + } + }; + + template struct RGB2RGB5x5Converter<5, bidx> + { + static __device__ __forceinline__ ushort cvt(const uchar3& src) + { + return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7)); + } + + static __device__ __forceinline__ ushort cvt(uint src) + { + uint b = 0xffu & (src >> (bidx * 8)); + uint g = 0xffu & (src >> 8); + uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + uint a = 0xffu & (src >> 24); + return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7) | (a * 0x8000)); + } + }; + + template struct RGB2RGB5x5; + + template struct RGB2RGB5x5<3, bidx,green_bits> : unary_function + { + __device__ __forceinline__ ushort operator()(const uchar3& src) const + { + return RGB2RGB5x5Converter::cvt(src); + } + + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} + }; + + template struct RGB2RGB5x5<4, bidx,green_bits> : unary_function + { + __device__ __forceinline__ ushort operator()(uint src) const + { + return RGB2RGB5x5Converter::cvt(src); + } + + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2RGB5x5 functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template struct RGB5x52RGBConverter; + + template struct RGB5x52RGBConverter<5, bidx> + { + static __device__ __forceinline__ void cvt(uint src, uchar3& dst) + { + (&dst.x)[bidx] = src << 3; + dst.y = (src >> 2) & ~7; + (&dst.x)[bidx ^ 2] = (src >> 7) & ~7; + } + + static __device__ __forceinline__ void cvt(uint src, uint& dst) + { + dst = 0; + + dst |= (0xffu & (src << 3)) << (bidx * 8); + dst |= (0xffu & ((src >> 2) & ~7)) << 8; + dst |= (0xffu & ((src >> 7) & ~7)) << ((bidx ^ 2) * 8); + dst |= ((src & 0x8000) * 0xffu) << 24; + } + }; + + template struct RGB5x52RGBConverter<6, bidx> + { + static __device__ __forceinline__ void cvt(uint src, uchar3& dst) + { + (&dst.x)[bidx] = src << 3; + dst.y = (src >> 3) & ~3; + (&dst.x)[bidx ^ 2] = (src >> 8) & ~7; + } + + static __device__ __forceinline__ void cvt(uint src, uint& dst) + { + dst = 0xffu << 24; + + dst |= (0xffu & (src << 3)) << (bidx * 8); + dst |= (0xffu &((src >> 3) & ~3)) << 8; + dst |= (0xffu & ((src >> 8) & ~7)) << ((bidx ^ 2) * 8); + } + }; + + template struct RGB5x52RGB; + + template struct RGB5x52RGB<3, bidx, green_bits> : unary_function + { + __device__ __forceinline__ uchar3 operator()(ushort src) const + { + uchar3 dst; + RGB5x52RGBConverter::cvt(src, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} + + }; + + template struct RGB5x52RGB<4, bidx, green_bits> : unary_function + { + __device__ __forceinline__ uint operator()(ushort src) const + { + uint dst; + RGB5x52RGBConverter::cvt(src, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB5x52RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////// Grayscale to Color //////////////////////////////// + + namespace color_detail + { + template struct Gray2RGB : unary_function::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(T src) const + { + typename TypeVec::vec_type dst; + + dst.z = dst.y = dst.x = src; + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} + }; + + template <> struct Gray2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + uint dst = 0xffu << 24; + + dst |= src; + dst |= src << 8; + dst |= src << 16; + + return dst; + } + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Gray2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template struct Gray2RGB5x5Converter; + template<> struct Gray2RGB5x5Converter<6> + { + static __device__ __forceinline__ ushort cvt(uint t) + { + return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8)); + } + }; + + template<> struct Gray2RGB5x5Converter<5> + { + static __device__ __forceinline__ ushort cvt(uint t) + { + t >>= 3; + return (ushort)(t | (t << 5) | (t << 10)); + } + }; + + template struct Gray2RGB5x5 : unary_function + { + __device__ __forceinline__ ushort operator()(uint src) const + { + return Gray2RGB5x5Converter::cvt(src); + } + + __host__ __device__ __forceinline__ Gray2RGB5x5() {} + __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Gray2RGB5x5 functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////// Color to Grayscale //////////////////////////////// + + namespace color_detail + { + template struct RGB5x52GrayConverter; + template <> struct RGB5x52GrayConverter<6> + { + static __device__ __forceinline__ uchar cvt(uint t) + { + return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 3) & 0xfc) * GY15 + ((t >> 8) & 0xf8) * RY15, gray_shift); + } + }; + + template <> struct RGB5x52GrayConverter<5> + { + static __device__ __forceinline__ uchar cvt(uint t) + { + return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 2) & 0xf8) * GY15 + ((t >> 7) & 0xf8) * RY15, gray_shift); + } + }; + + template struct RGB5x52Gray : unary_function + { + __device__ __forceinline__ uchar operator()(uint src) const + { + return RGB5x52GrayConverter::cvt(src); + } + __host__ __device__ __forceinline__ RGB5x52Gray() {} + __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB5x52Gray functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template static __device__ __forceinline__ T RGB2GrayConvert(const T* src) + { + return (T)CV_DESCALE((unsigned)(src[bidx] * BY15 + src[1] * GY15 + src[bidx^2] * RY15), gray_shift); + } + + template static __device__ __forceinline__ uchar RGB2GrayConvert(uint src) + { + uint b = 0xffu & (src >> (bidx * 8)); + uint g = 0xffu & (src >> 8); + uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + return CV_DESCALE((uint)(b * BY15 + g * GY15 + r * RY15), gray_shift); + } + + template static __device__ __forceinline__ float RGB2GrayConvert(const float* src) + { + return src[bidx] * B2YF + src[1] * G2YF + src[bidx^2] * R2YF; + } + + template struct RGB2Gray : unary_function::vec_type, T> + { + __device__ __forceinline__ T operator()(const typename TypeVec::vec_type& src) const + { + return RGB2GrayConvert(&src.x); + } + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} + }; + + template struct RGB2Gray : unary_function + { + __device__ __forceinline__ uchar operator()(uint src) const + { + return RGB2GrayConvert(src); + } + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2Gray functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// + + namespace color_detail + { + __constant__ float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF }; + __constant__ int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI }; + + template static __device__ void RGB2YUVConvert(const T* src, D& dst) + { + const int delta = ColorChannel::half() * (1 << yuv_shift); + + const int Y = CV_DESCALE(src[0] * c_RGB2YUVCoeffs_i[bidx^2] + src[1] * c_RGB2YUVCoeffs_i[1] + src[2] * c_RGB2YUVCoeffs_i[bidx], yuv_shift); + const int Cr = CV_DESCALE((src[bidx^2] - Y) * c_RGB2YUVCoeffs_i[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((src[bidx] - Y) * c_RGB2YUVCoeffs_i[4] + delta, yuv_shift); + + dst.x = saturate_cast(Y); + dst.y = saturate_cast(Cr); + dst.z = saturate_cast(Cb); + } + + template static __device__ __forceinline__ void RGB2YUVConvert(const float* src, D& dst) + { + dst.x = src[0] * c_RGB2YUVCoeffs_f[bidx^2] + src[1] * c_RGB2YUVCoeffs_f[1] + src[2] * c_RGB2YUVCoeffs_f[bidx]; + dst.y = (src[bidx^2] - dst.x) * c_RGB2YUVCoeffs_f[3] + ColorChannel::half(); + dst.z = (src[bidx] - dst.x) * c_RGB2YUVCoeffs_f[4] + ColorChannel::half(); + } + + template struct RGB2YUV + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + RGB2YUVConvert(&src.x, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB2YUV() {} + __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2YUV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_YUV2RGBCoeffs_f[5] = { U2BF, U2GF, V2GF, V2RF }; + __constant__ int c_YUV2RGBCoeffs_i[5] = { U2BI, U2GI, V2GI, V2RI }; + + template static __device__ void YUV2RGBConvert(const T& src, D* dst) + { + const int b = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[3], yuv_shift); + + const int g = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[2] + + (src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[1], yuv_shift); + + const int r = src.x + CV_DESCALE((src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[0], yuv_shift); + + dst[bidx] = saturate_cast(b); + dst[1] = saturate_cast(g); + dst[bidx^2] = saturate_cast(r); + } + + template static __device__ uint YUV2RGBConvert(uint src) + { + const int x = 0xff & (src); + const int y = 0xff & (src >> 8); + const int z = 0xff & (src >> 16); + + const int b = x + CV_DESCALE((z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[3], yuv_shift); + + const int g = x + CV_DESCALE((z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[2] + + (y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[1], yuv_shift); + + const int r = x + CV_DESCALE((y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[0], yuv_shift); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(b) << (bidx * 8); + dst |= saturate_cast(g) << 8; + dst |= saturate_cast(r) << ((bidx ^ 2) * 8); + + return dst; + } + + template static __device__ __forceinline__ void YUV2RGBConvert(const T& src, float* dst) + { + dst[bidx] = src.x + (src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_f[3]; + + dst[1] = src.x + (src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_f[2] + + (src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_f[1]; + + dst[bidx^2] = src.x + (src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_f[0]; + } + + template struct YUV2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + YUV2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} + }; + + template struct YUV2RGB : unary_function + { + __device__ __forceinline__ uint operator ()(uint src) const + { + return YUV2RGBConvert(src); + } + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::YUV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// + + namespace color_detail + { + __constant__ float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF}; + __constant__ int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI}; + + template static __device__ void RGB2YCrCbConvert(const T* src, D& dst) + { + const int delta = ColorChannel::half() * (1 << yuv_shift); + + const int Y = CV_DESCALE(src[0] * c_RGB2YCrCbCoeffs_i[bidx^2] + src[1] * c_RGB2YCrCbCoeffs_i[1] + src[2] * c_RGB2YCrCbCoeffs_i[bidx], yuv_shift); + const int Cr = CV_DESCALE((src[bidx^2] - Y) * c_RGB2YCrCbCoeffs_i[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((src[bidx] - Y) * c_RGB2YCrCbCoeffs_i[4] + delta, yuv_shift); + + dst.x = saturate_cast(Y); + dst.y = saturate_cast(Cr); + dst.z = saturate_cast(Cb); + } + + template static __device__ uint RGB2YCrCbConvert(uint src) + { + const int delta = ColorChannel::half() * (1 << yuv_shift); + + const int Y = CV_DESCALE((0xffu & src) * c_RGB2YCrCbCoeffs_i[bidx^2] + (0xffu & (src >> 8)) * c_RGB2YCrCbCoeffs_i[1] + (0xffu & (src >> 16)) * c_RGB2YCrCbCoeffs_i[bidx], yuv_shift); + const int Cr = CV_DESCALE(((0xffu & (src >> ((bidx ^ 2) * 8))) - Y) * c_RGB2YCrCbCoeffs_i[3] + delta, yuv_shift); + const int Cb = CV_DESCALE(((0xffu & (src >> (bidx * 8))) - Y) * c_RGB2YCrCbCoeffs_i[4] + delta, yuv_shift); + + uint dst = 0; + + dst |= saturate_cast(Y); + dst |= saturate_cast(Cr) << 8; + dst |= saturate_cast(Cb) << 16; + + return dst; + } + + template static __device__ __forceinline__ void RGB2YCrCbConvert(const float* src, D& dst) + { + dst.x = src[0] * c_RGB2YCrCbCoeffs_f[bidx^2] + src[1] * c_RGB2YCrCbCoeffs_f[1] + src[2] * c_RGB2YCrCbCoeffs_f[bidx]; + dst.y = (src[bidx^2] - dst.x) * c_RGB2YCrCbCoeffs_f[3] + ColorChannel::half(); + dst.z = (src[bidx] - dst.x) * c_RGB2YCrCbCoeffs_f[4] + ColorChannel::half(); + } + + template struct RGB2YCrCb + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + RGB2YCrCbConvert(&src.x, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} + }; + + template struct RGB2YCrCb : unary_function + { + __device__ __forceinline__ uint operator ()(uint src) const + { + return RGB2YCrCbConvert(src); + } + + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2YCrCb functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_YCrCb2RGBCoeffs_f[5] = {CR2RF, CR2GF, CB2GF, CB2BF}; + __constant__ int c_YCrCb2RGBCoeffs_i[5] = {CR2RI, CR2GI, CB2GI, CB2BI}; + + template static __device__ void YCrCb2RGBConvert(const T& src, D* dst) + { + const int b = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[3], yuv_shift); + const int g = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[2] + (src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[1], yuv_shift); + const int r = src.x + CV_DESCALE((src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[0], yuv_shift); + + dst[bidx] = saturate_cast(b); + dst[1] = saturate_cast(g); + dst[bidx^2] = saturate_cast(r); + } + + template static __device__ uint YCrCb2RGBConvert(uint src) + { + const int x = 0xff & (src); + const int y = 0xff & (src >> 8); + const int z = 0xff & (src >> 16); + + const int b = x + CV_DESCALE((z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[3], yuv_shift); + const int g = x + CV_DESCALE((z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[2] + (y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[1], yuv_shift); + const int r = x + CV_DESCALE((y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[0], yuv_shift); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(b) << (bidx * 8); + dst |= saturate_cast(g) << 8; + dst |= saturate_cast(r) << ((bidx ^ 2) * 8); + + return dst; + } + + template __device__ __forceinline__ void YCrCb2RGBConvert(const T& src, float* dst) + { + dst[bidx] = src.x + (src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[3]; + dst[1] = src.x + (src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[2] + (src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[1]; + dst[bidx^2] = src.x + (src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[0]; + } + + template struct YCrCb2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + YCrCb2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} + }; + + template struct YCrCb2RGB : unary_function + { + __device__ __forceinline__ uint operator ()(uint src) const + { + return YCrCb2RGBConvert(src); + } + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::YCrCb2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +////////////////////////////////////// RGB <-> XYZ /////////////////////////////////////// + + namespace color_detail + { + __constant__ float c_RGB2XYZ_D65f[9] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f }; + __constant__ int c_RGB2XYZ_D65i[9] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 }; + + template static __device__ __forceinline__ void RGB2XYZConvert(const T* src, D& dst) + { + dst.z = saturate_cast(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[6] + src[1] * c_RGB2XYZ_D65i[7] + src[bidx] * c_RGB2XYZ_D65i[8], xyz_shift)); + dst.x = saturate_cast(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[0] + src[1] * c_RGB2XYZ_D65i[1] + src[bidx] * c_RGB2XYZ_D65i[2], xyz_shift)); + dst.y = saturate_cast(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[3] + src[1] * c_RGB2XYZ_D65i[4] + src[bidx] * c_RGB2XYZ_D65i[5], xyz_shift)); + } + + template static __device__ __forceinline__ uint RGB2XYZConvert(uint src) + { + const uint b = 0xffu & (src >> (bidx * 8)); + const uint g = 0xffu & (src >> 8); + const uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + + const uint x = saturate_cast(CV_DESCALE(r * c_RGB2XYZ_D65i[0] + g * c_RGB2XYZ_D65i[1] + b * c_RGB2XYZ_D65i[2], xyz_shift)); + const uint y = saturate_cast(CV_DESCALE(r * c_RGB2XYZ_D65i[3] + g * c_RGB2XYZ_D65i[4] + b * c_RGB2XYZ_D65i[5], xyz_shift)); + const uint z = saturate_cast(CV_DESCALE(r * c_RGB2XYZ_D65i[6] + g * c_RGB2XYZ_D65i[7] + b * c_RGB2XYZ_D65i[8], xyz_shift)); + + uint dst = 0; + + dst |= x; + dst |= y << 8; + dst |= z << 16; + + return dst; + } + + template static __device__ __forceinline__ void RGB2XYZConvert(const float* src, D& dst) + { + dst.x = src[bidx^2] * c_RGB2XYZ_D65f[0] + src[1] * c_RGB2XYZ_D65f[1] + src[bidx] * c_RGB2XYZ_D65f[2]; + dst.y = src[bidx^2] * c_RGB2XYZ_D65f[3] + src[1] * c_RGB2XYZ_D65f[4] + src[bidx] * c_RGB2XYZ_D65f[5]; + dst.z = src[bidx^2] * c_RGB2XYZ_D65f[6] + src[1] * c_RGB2XYZ_D65f[7] + src[bidx] * c_RGB2XYZ_D65f[8]; + } + + template struct RGB2XYZ + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2XYZConvert(&src.x, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} + }; + + template struct RGB2XYZ : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return RGB2XYZConvert(src); + } + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2XYZ functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_XYZ2sRGB_D65f[9] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f }; + __constant__ int c_XYZ2sRGB_D65i[9] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 }; + + template static __device__ __forceinline__ void XYZ2RGBConvert(const T& src, D* dst) + { + dst[bidx^2] = saturate_cast(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[0] + src.y * c_XYZ2sRGB_D65i[1] + src.z * c_XYZ2sRGB_D65i[2], xyz_shift)); + dst[1] = saturate_cast(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[3] + src.y * c_XYZ2sRGB_D65i[4] + src.z * c_XYZ2sRGB_D65i[5], xyz_shift)); + dst[bidx] = saturate_cast(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[6] + src.y * c_XYZ2sRGB_D65i[7] + src.z * c_XYZ2sRGB_D65i[8], xyz_shift)); + } + + template static __device__ __forceinline__ uint XYZ2RGBConvert(uint src) + { + const int x = 0xff & src; + const int y = 0xff & (src >> 8); + const int z = 0xff & (src >> 16); + + const uint r = saturate_cast(CV_DESCALE(x * c_XYZ2sRGB_D65i[0] + y * c_XYZ2sRGB_D65i[1] + z * c_XYZ2sRGB_D65i[2], xyz_shift)); + const uint g = saturate_cast(CV_DESCALE(x * c_XYZ2sRGB_D65i[3] + y * c_XYZ2sRGB_D65i[4] + z * c_XYZ2sRGB_D65i[5], xyz_shift)); + const uint b = saturate_cast(CV_DESCALE(x * c_XYZ2sRGB_D65i[6] + y * c_XYZ2sRGB_D65i[7] + z * c_XYZ2sRGB_D65i[8], xyz_shift)); + + uint dst = 0xffu << 24; + + dst |= b << (bidx * 8); + dst |= g << 8; + dst |= r << ((bidx ^ 2) * 8); + + return dst; + } + + template static __device__ __forceinline__ void XYZ2RGBConvert(const T& src, float* dst) + { + dst[bidx^2] = src.x * c_XYZ2sRGB_D65f[0] + src.y * c_XYZ2sRGB_D65f[1] + src.z * c_XYZ2sRGB_D65f[2]; + dst[1] = src.x * c_XYZ2sRGB_D65f[3] + src.y * c_XYZ2sRGB_D65f[4] + src.z * c_XYZ2sRGB_D65f[5]; + dst[bidx] = src.x * c_XYZ2sRGB_D65f[6] + src.y * c_XYZ2sRGB_D65f[7] + src.z * c_XYZ2sRGB_D65f[8]; + } + + template struct XYZ2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + XYZ2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} + }; + + template struct XYZ2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return XYZ2RGBConvert(src); + } + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::XYZ2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +////////////////////////////////////// RGB <-> HSV /////////////////////////////////////// + + namespace color_detail + { + __constant__ int c_HsvDivTable [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096}; + __constant__ int c_HsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482}; + __constant__ int c_HsvDivTable256[256] = {0, 174763, 87381, 58254, 43691, 34953, 29127, 24966, 21845, 19418, 17476, 15888, 14564, 13443, 12483, 11651, 10923, 10280, 9709, 9198, 8738, 8322, 7944, 7598, 7282, 6991, 6722, 6473, 6242, 6026, 5825, 5638, 5461, 5296, 5140, 4993, 4855, 4723, 4599, 4481, 4369, 4263, 4161, 4064, 3972, 3884, 3799, 3718, 3641, 3567, 3495, 3427, 3361, 3297, 3236, 3178, 3121, 3066, 3013, 2962, 2913, 2865, 2819, 2774, 2731, 2689, 2648, 2608, 2570, 2533, 2497, 2461, 2427, 2394, 2362, 2330, 2300, 2270, 2241, 2212, 2185, 2158, 2131, 2106, 2081, 2056, 2032, 2009, 1986, 1964, 1942, 1920, 1900, 1879, 1859, 1840, 1820, 1802, 1783, 1765, 1748, 1730, 1713, 1697, 1680, 1664, 1649, 1633, 1618, 1603, 1589, 1574, 1560, 1547, 1533, 1520, 1507, 1494, 1481, 1469, 1456, 1444, 1432, 1421, 1409, 1398, 1387, 1376, 1365, 1355, 1344, 1334, 1324, 1314, 1304, 1295, 1285, 1276, 1266, 1257, 1248, 1239, 1231, 1222, 1214, 1205, 1197, 1189, 1181, 1173, 1165, 1157, 1150, 1142, 1135, 1128, 1120, 1113, 1106, 1099, 1092, 1085, 1079, 1072, 1066, 1059, 1053, 1046, 1040, 1034, 1028, 1022, 1016, 1010, 1004, 999, 993, 987, 982, 976, 971, 966, 960, 955, 950, 945, 940, 935, 930, 925, 920, 915, 910, 906, 901, 896, 892, 887, 883, 878, 874, 869, 865, 861, 857, 853, 848, 844, 840, 836, 832, 828, 824, 820, 817, 813, 809, 805, 802, 798, 794, 791, 787, 784, 780, 777, 773, 770, 767, 763, 760, 757, 753, 750, 747, 744, 741, 737, 734, 731, 728, 725, 722, 719, 716, 713, 710, 708, 705, 702, 699, 696, 694, 691, 688, 685}; + + template static __device__ void RGB2HSVConvert(const uchar* src, D& dst) + { + const int hsv_shift = 12; + const int* hdiv_table = hr == 180 ? c_HsvDivTable180 : c_HsvDivTable256; + + int b = src[bidx], g = src[1], r = src[bidx^2]; + int h, s, v = b; + int vmin = b, diff; + int vr, vg; + + v = ::max(v, g); + v = ::max(v, r); + vmin = ::min(vmin, g); + vmin = ::min(vmin, r); + + diff = v - vmin; + vr = (v == r) * -1; + vg = (v == g) * -1; + + s = (diff * c_HsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift; + h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff)))); + h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift; + h += (h < 0) * hr; + + dst.x = saturate_cast(h); + dst.y = (uchar)s; + dst.z = (uchar)v; + } + + template static __device__ uint RGB2HSVConvert(uint src) + { + const int hsv_shift = 12; + const int* hdiv_table = hr == 180 ? c_HsvDivTable180 : c_HsvDivTable256; + + const int b = 0xff & (src >> (bidx * 8)); + const int g = 0xff & (src >> 8); + const int r = 0xff & (src >> ((bidx ^ 2) * 8)); + + int h, s, v = b; + int vmin = b, diff; + int vr, vg; + + v = ::max(v, g); + v = ::max(v, r); + vmin = ::min(vmin, g); + vmin = ::min(vmin, r); + + diff = v - vmin; + vr = (v == r) * -1; + vg = (v == g) * -1; + + s = (diff * c_HsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift; + h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff)))); + h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift; + h += (h < 0) * hr; + + uint dst = 0; + + dst |= saturate_cast(h); + dst |= (0xffu & s) << 8; + dst |= (0xffu & v) << 16; + + return dst; + } + + template static __device__ void RGB2HSVConvert(const float* src, D& dst) + { + const float hscale = hr * (1.f / 360.f); + + float b = src[bidx], g = src[1], r = src[bidx^2]; + float h, s, v; + + float vmin, diff; + + v = vmin = r; + v = fmax(v, g); + v = fmax(v, b); + vmin = fmin(vmin, g); + vmin = fmin(vmin, b); + + diff = v - vmin; + s = diff / (float)(::fabs(v) + numeric_limits::epsilon()); + diff = (float)(60. / (diff + numeric_limits::epsilon())); + + h = (v == r) * (g - b) * diff; + h += (v != r && v == g) * ((b - r) * diff + 120.f); + h += (v != r && v != g) * ((r - g) * diff + 240.f); + h += (h < 0) * 360.f; + + dst.x = h * hscale; + dst.y = s; + dst.z = v; + } + + template struct RGB2HSV + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2HSVConvert(&src.x, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} + }; + + template struct RGB2HSV : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return RGB2HSVConvert(src); + } + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ int c_HsvSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} }; + + template static __device__ void HSV2RGBConvert(const T& src, float* dst) + { + const float hscale = 6.f / hr; + + float h = src.x, s = src.y, v = src.z; + float b = v, g = v, r = v; + + if (s != 0) + { + h *= hscale; + + if( h < 0 ) + do h += 6; while( h < 0 ); + else if( h >= 6 ) + do h -= 6; while( h >= 6 ); + + int sector = __float2int_rd(h); + h -= sector; + + if ( (unsigned)sector >= 6u ) + { + sector = 0; + h = 0.f; + } + + float tab[4]; + tab[0] = v; + tab[1] = v * (1.f - s); + tab[2] = v * (1.f - s * h); + tab[3] = v * (1.f - s * (1.f - h)); + + b = tab[c_HsvSectorData[sector][0]]; + g = tab[c_HsvSectorData[sector][1]]; + r = tab[c_HsvSectorData[sector][2]]; + } + + dst[bidx] = b; + dst[1] = g; + dst[bidx^2] = r; + } + + template static __device__ void HSV2RGBConvert(const T& src, uchar* dst) + { + float3 buf; + + buf.x = src.x; + buf.y = src.y * (1.f / 255.f); + buf.z = src.z * (1.f / 255.f); + + HSV2RGBConvert(buf, &buf.x); + + dst[0] = saturate_cast(buf.x * 255.f); + dst[1] = saturate_cast(buf.y * 255.f); + dst[2] = saturate_cast(buf.z * 255.f); + } + + template static __device__ uint HSV2RGBConvert(uint src) + { + float3 buf; + + buf.x = src & 0xff; + buf.y = ((src >> 8) & 0xff) * (1.f/255.f); + buf.z = ((src >> 16) & 0xff) * (1.f/255.f); + + HSV2RGBConvert(buf, &buf.x); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(buf.x * 255.f); + dst |= saturate_cast(buf.y * 255.f) << 8; + dst |= saturate_cast(buf.z * 255.f) << 16; + + return dst; + } + + template struct HSV2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + HSV2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} + }; + + template struct HSV2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return HSV2RGBConvert(src); + } + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +/////////////////////////////////////// RGB <-> HLS //////////////////////////////////////// + + namespace color_detail + { + template static __device__ void RGB2HLSConvert(const float* src, D& dst) + { + const float hscale = hr * (1.f / 360.f); + + float b = src[bidx], g = src[1], r = src[bidx^2]; + float h = 0.f, s = 0.f, l; + float vmin, vmax, diff; + + vmax = vmin = r; + vmax = fmax(vmax, g); + vmax = fmax(vmax, b); + vmin = fmin(vmin, g); + vmin = fmin(vmin, b); + + diff = vmax - vmin; + l = (vmax + vmin) * 0.5f; + + if (diff > numeric_limits::epsilon()) + { + s = (l < 0.5f) * diff / (vmax + vmin); + s += (l >= 0.5f) * diff / (2.0f - vmax - vmin); + + diff = 60.f / diff; + + h = (vmax == r) * (g - b) * diff; + h += (vmax != r && vmax == g) * ((b - r) * diff + 120.f); + h += (vmax != r && vmax != g) * ((r - g) * diff + 240.f); + h += (h < 0.f) * 360.f; + } + + dst.x = h * hscale; + dst.y = l; + dst.z = s; + } + + template static __device__ void RGB2HLSConvert(const uchar* src, D& dst) + { + float3 buf; + + buf.x = src[0] * (1.f / 255.f); + buf.y = src[1] * (1.f / 255.f); + buf.z = src[2] * (1.f / 255.f); + + RGB2HLSConvert(&buf.x, buf); + + dst.x = saturate_cast(buf.x); + dst.y = saturate_cast(buf.y*255.f); + dst.z = saturate_cast(buf.z*255.f); + } + + template static __device__ uint RGB2HLSConvert(uint src) + { + float3 buf; + + buf.x = (0xff & src) * (1.f / 255.f); + buf.y = (0xff & (src >> 8)) * (1.f / 255.f); + buf.z = (0xff & (src >> 16)) * (1.f / 255.f); + + RGB2HLSConvert(&buf.x, buf); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(buf.x); + dst |= saturate_cast(buf.y * 255.f) << 8; + dst |= saturate_cast(buf.z * 255.f) << 16; + + return dst; + } + + template struct RGB2HLS + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2HLSConvert(&src.x, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} + }; + + template struct RGB2HLS : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return RGB2HLSConvert(src); + } + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ int c_HlsSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} }; + + template static __device__ void HLS2RGBConvert(const T& src, float* dst) + { + const float hscale = 6.0f / hr; + + float h = src.x, l = src.y, s = src.z; + float b = l, g = l, r = l; + + if (s != 0) + { + float p2 = (l <= 0.5f) * l * (1 + s); + p2 += (l > 0.5f) * (l + s - l * s); + float p1 = 2 * l - p2; + + h *= hscale; + + if( h < 0 ) + do h += 6; while( h < 0 ); + else if( h >= 6 ) + do h -= 6; while( h >= 6 ); + + int sector; + sector = __float2int_rd(h); + + h -= sector; + + float tab[4]; + tab[0] = p2; + tab[1] = p1; + tab[2] = p1 + (p2 - p1) * (1 - h); + tab[3] = p1 + (p2 - p1) * h; + + b = tab[c_HlsSectorData[sector][0]]; + g = tab[c_HlsSectorData[sector][1]]; + r = tab[c_HlsSectorData[sector][2]]; + } + + dst[bidx] = b; + dst[1] = g; + dst[bidx^2] = r; + } + + template static __device__ void HLS2RGBConvert(const T& src, uchar* dst) + { + float3 buf; + + buf.x = src.x; + buf.y = src.y * (1.f / 255.f); + buf.z = src.z * (1.f / 255.f); + + HLS2RGBConvert(buf, &buf.x); + + dst[0] = saturate_cast(buf.x * 255.f); + dst[1] = saturate_cast(buf.y * 255.f); + dst[2] = saturate_cast(buf.z * 255.f); + } + + template static __device__ uint HLS2RGBConvert(uint src) + { + float3 buf; + + buf.x = 0xff & src; + buf.y = (0xff & (src >> 8)) * (1.f / 255.f); + buf.z = (0xff & (src >> 16)) * (1.f / 255.f); + + HLS2RGBConvert(buf, &buf.x); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(buf.x * 255.f); + dst |= saturate_cast(buf.y * 255.f) << 8; + dst |= saturate_cast(buf.z * 255.f) << 16; + + return dst; + } + + template struct HLS2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + HLS2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} + }; + + template struct HLS2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return HLS2RGBConvert(src); + } + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> Lab ///////////////////////////////////// + + namespace color_detail + { + enum + { + LAB_CBRT_TAB_SIZE = 1024, + GAMMA_TAB_SIZE = 1024, + lab_shift = xyz_shift, + gamma_shift = 3, + lab_shift2 = (lab_shift + gamma_shift), + LAB_CBRT_TAB_SIZE_B = (256 * 3 / 2 * (1 << gamma_shift)) + }; + + __constant__ ushort c_sRGBGammaTab_b[] = {0,1,1,2,2,3,4,4,5,6,6,7,8,8,9,10,11,11,12,13,14,15,16,17,19,20,21,22,24,25,26,28,29,31,33,34,36,38,40,41,43,45,47,49,51,54,56,58,60,63,65,68,70,73,75,78,81,83,86,89,92,95,98,101,105,108,111,115,118,121,125,129,132,136,140,144,147,151,155,160,164,168,172,176,181,185,190,194,199,204,209,213,218,223,228,233,239,244,249,255,260,265,271,277,282,288,294,300,306,312,318,324,331,337,343,350,356,363,370,376,383,390,397,404,411,418,426,433,440,448,455,463,471,478,486,494,502,510,518,527,535,543,552,560,569,578,586,595,604,613,622,631,641,650,659,669,678,688,698,707,717,727,737,747,757,768,778,788,799,809,820,831,842,852,863,875,886,897,908,920,931,943,954,966,978,990,1002,1014,1026,1038,1050,1063,1075,1088,1101,1113,1126,1139,1152,1165,1178,1192,1205,1218,1232,1245,1259,1273,1287,1301,1315,1329,1343,1357,1372,1386,1401,1415,1430,1445,1460,1475,1490,1505,1521,1536,1551,1567,1583,1598,1614,1630,1646,1662,1678,1695,1711,1728,1744,1761,1778,1794,1811,1828,1846,1863,1880,1897,1915,1933,1950,1968,1986,2004,2022,2040}; + + __device__ __forceinline__ int LabCbrt_b(int i) + { + float x = i * (1.f / (255.f * (1 << gamma_shift))); + return (1 << lab_shift2) * (x < 0.008856f ? x * 7.787f + 0.13793103448275862f : ::cbrtf(x)); + } + + template + __device__ __forceinline__ void RGB2LabConvert_b(const T& src, D& dst) + { + const int Lscale = (116 * 255 + 50) / 100; + const int Lshift = -((16 * 255 * (1 << lab_shift2) + 50) / 100); + + int B = blueIdx == 0 ? src.x : src.z; + int G = src.y; + int R = blueIdx == 0 ? src.z : src.x; + + if (srgb) + { + B = c_sRGBGammaTab_b[B]; + G = c_sRGBGammaTab_b[G]; + R = c_sRGBGammaTab_b[R]; + } + else + { + B <<= 3; + G <<= 3; + R <<= 3; + } + + int fX = LabCbrt_b(CV_DESCALE(B * 778 + G * 1541 + R * 1777, lab_shift)); + int fY = LabCbrt_b(CV_DESCALE(B * 296 + G * 2929 + R * 871, lab_shift)); + int fZ = LabCbrt_b(CV_DESCALE(B * 3575 + G * 448 + R * 73, lab_shift)); + + int L = CV_DESCALE(Lscale * fY + Lshift, lab_shift2); + int a = CV_DESCALE(500 * (fX - fY) + 128 * (1 << lab_shift2), lab_shift2); + int b = CV_DESCALE(200 * (fY - fZ) + 128 * (1 << lab_shift2), lab_shift2); + + dst.x = saturate_cast(L); + dst.y = saturate_cast(a); + dst.z = saturate_cast(b); + } + + __device__ __forceinline__ float splineInterpolate(float x, const float* tab, int n) + { + int ix = ::min(::max(int(x), 0), n-1); + x -= ix; + tab += ix * 4; + return ((tab[3] * x + tab[2]) * x + tab[1]) * x + tab[0]; + } + + __constant__ float c_sRGBGammaTab[] = {0,7.55853e-05,0.,-7.51331e-13,7.55853e-05,7.55853e-05,-2.25399e-12,3.75665e-12,0.000151171,7.55853e-05,9.01597e-12,-6.99932e-12,0.000226756,7.55853e-05,-1.1982e-11,2.41277e-12,0.000302341,7.55853e-05,-4.74369e-12,1.19001e-11,0.000377927,7.55853e-05,3.09568e-11,-2.09095e-11,0.000453512,7.55853e-05,-3.17718e-11,1.35303e-11,0.000529097,7.55853e-05,8.81905e-12,-4.10782e-12,0.000604683,7.55853e-05,-3.50439e-12,2.90097e-12,0.000680268,7.55853e-05,5.19852e-12,-7.49607e-12,0.000755853,7.55853e-05,-1.72897e-11,2.70833e-11,0.000831439,7.55854e-05,6.39602e-11,-4.26295e-11,0.000907024,7.55854e-05,-6.39282e-11,2.70193e-11,0.000982609,7.55853e-05,1.71298e-11,-7.24017e-12,0.00105819,7.55853e-05,-4.59077e-12,1.94137e-12,0.00113378,7.55853e-05,1.23333e-12,-5.25291e-13,0.00120937,7.55853e-05,-3.42545e-13,1.59799e-13,0.00128495,7.55853e-05,1.36852e-13,-1.13904e-13,0.00136054,7.55853e-05,-2.04861e-13,2.95818e-13,0.00143612,7.55853e-05,6.82594e-13,-1.06937e-12,0.00151171,7.55853e-05,-2.52551e-12,3.98166e-12,0.00158729,7.55853e-05,9.41946e-12,-1.48573e-11,0.00166288,7.55853e-05,-3.51523e-11,5.54474e-11,0.00173846,7.55854e-05,1.3119e-10,-9.0517e-11,0.00181405,7.55854e-05,-1.40361e-10,7.37899e-11,0.00188963,7.55853e-05,8.10085e-11,-8.82272e-11,0.00196522,7.55852e-05,-1.83673e-10,1.62704e-10,0.0020408,7.55853e-05,3.04438e-10,-2.13341e-10,0.00211639,7.55853e-05,-3.35586e-10,2.25e-10,0.00219197,7.55853e-05,3.39414e-10,-2.20997e-10,0.00226756,7.55853e-05,-3.23576e-10,1.93326e-10,0.00234315,7.55853e-05,2.564e-10,-8.66446e-11,0.00241873,7.55855e-05,-3.53328e-12,-7.9578e-11,0.00249432,7.55853e-05,-2.42267e-10,1.72126e-10,0.0025699,7.55853e-05,2.74111e-10,-1.43265e-10,0.00264549,7.55854e-05,-1.55683e-10,-6.47292e-11,0.00272107,7.55849e-05,-3.4987e-10,8.67842e-10,0.00279666,7.55868e-05,2.25366e-09,-3.8723e-09,0.00287224,7.55797e-05,-9.36325e-09,1.5087e-08,0.00294783,7.56063e-05,3.58978e-08,-5.69415e-08,0.00302341,7.55072e-05,-1.34927e-07,2.13144e-07,0.003099,7.58768e-05,5.04507e-07,1.38713e-07,0.00317552,7.7302e-05,9.20646e-07,-1.55186e-07,0.00325359,7.86777e-05,4.55087e-07,4.26813e-08,0.00333276,7.97159e-05,5.83131e-07,-1.06495e-08,0.00341305,8.08502e-05,5.51182e-07,3.87467e-09,0.00349446,8.19642e-05,5.62806e-07,-1.92586e-10,0.00357698,8.30892e-05,5.62228e-07,1.0866e-09,0.00366063,8.4217e-05,5.65488e-07,5.02818e-10,0.00374542,8.53494e-05,5.66997e-07,8.60211e-10,0.00383133,8.6486e-05,5.69577e-07,7.13044e-10,0.00391839,8.76273e-05,5.71716e-07,4.78527e-10,0.00400659,8.87722e-05,5.73152e-07,1.09818e-09,0.00409594,8.99218e-05,5.76447e-07,2.50964e-10,0.00418644,9.10754e-05,5.772e-07,1.15762e-09,0.00427809,9.22333e-05,5.80672e-07,2.40865e-10,0.0043709,9.33954e-05,5.81395e-07,1.13854e-09,0.00446488,9.45616e-05,5.84811e-07,3.27267e-10,0.00456003,9.57322e-05,5.85792e-07,8.1197e-10,0.00465635,9.69062e-05,5.88228e-07,6.15823e-10,0.00475384,9.80845e-05,5.90076e-07,9.15747e-10,0.00485252,9.92674e-05,5.92823e-07,3.778e-10,0.00495238,0.000100454,5.93956e-07,8.32623e-10,0.00505343,0.000101645,5.96454e-07,4.82695e-10,0.00515567,0.000102839,5.97902e-07,9.61904e-10,0.00525911,0.000104038,6.00788e-07,3.26281e-10,0.00536375,0.00010524,6.01767e-07,9.926e-10,0.00546959,0.000106447,6.04745e-07,3.59933e-10,0.00557664,0.000107657,6.05824e-07,8.2728e-10,0.0056849,0.000108871,6.08306e-07,5.21898e-10,0.00579438,0.00011009,6.09872e-07,8.10492e-10,0.00590508,0.000111312,6.12303e-07,4.27046e-10,0.00601701,0.000112538,6.13585e-07,7.40878e-10,0.00613016,0.000113767,6.15807e-07,8.00469e-10,0.00624454,0.000115001,6.18209e-07,2.48178e-10,0.00636016,0.000116238,6.18953e-07,1.00073e-09,0.00647702,0.000117479,6.21955e-07,4.05654e-10,0.00659512,0.000118724,6.23172e-07,6.36192e-10,0.00671447,0.000119973,6.25081e-07,7.74927e-10,0.00683507,0.000121225,6.27406e-07,4.54975e-10,0.00695692,0.000122481,6.28771e-07,6.64841e-10,0.00708003,0.000123741,6.30765e-07,6.10972e-10,0.00720441,0.000125004,6.32598e-07,6.16543e-10,0.00733004,0.000126271,6.34448e-07,6.48204e-10,0.00745695,0.000127542,6.36392e-07,5.15835e-10,0.00758513,0.000128816,6.3794e-07,5.48103e-10,0.00771458,0.000130094,6.39584e-07,1.01706e-09,0.00784532,0.000131376,6.42635e-07,4.0283e-11,0.00797734,0.000132661,6.42756e-07,6.84471e-10,0.00811064,0.000133949,6.4481e-07,9.47144e-10,0.00824524,0.000135241,6.47651e-07,1.83472e-10,0.00838112,0.000136537,6.48201e-07,1.11296e-09,0.00851831,0.000137837,6.5154e-07,2.13163e-11,0.0086568,0.00013914,6.51604e-07,6.64462e-10,0.00879659,0.000140445,6.53598e-07,1.04613e-09,0.00893769,0.000141756,6.56736e-07,-1.92377e-10,0.0090801,0.000143069,6.56159e-07,1.58601e-09,0.00922383,0.000144386,6.60917e-07,-5.63754e-10,0.00936888,0.000145706,6.59226e-07,1.60033e-09,0.00951524,0.000147029,6.64027e-07,-2.49543e-10,0.00966294,0.000148356,6.63278e-07,1.26043e-09,0.00981196,0.000149687,6.67059e-07,-1.35572e-10,0.00996231,0.00015102,6.66653e-07,1.14458e-09,0.010114,0.000152357,6.70086e-07,2.13864e-10,0.010267,0.000153698,6.70728e-07,7.93856e-10,0.0104214,0.000155042,6.73109e-07,3.36077e-10,0.0105771,0.000156389,6.74118e-07,6.55765e-10,0.0107342,0.000157739,6.76085e-07,7.66211e-10,0.0108926,0.000159094,6.78384e-07,4.66116e-12,0.0110524,0.000160451,6.78398e-07,1.07775e-09,0.0112135,0.000161811,6.81631e-07,3.41023e-10,0.011376,0.000163175,6.82654e-07,3.5205e-10,0.0115398,0.000164541,6.8371e-07,1.04473e-09,0.0117051,0.000165912,6.86844e-07,1.25757e-10,0.0118717,0.000167286,6.87222e-07,3.14818e-10,0.0120396,0.000168661,6.88166e-07,1.40886e-09,0.012209,0.000170042,6.92393e-07,-3.62244e-10,0.0123797,0.000171425,6.91306e-07,9.71397e-10,0.0125518,0.000172811,6.9422e-07,2.02003e-10,0.0127253,0.0001742,6.94826e-07,1.01448e-09,0.0129002,0.000175593,6.97869e-07,3.96653e-10,0.0130765,0.00017699,6.99059e-07,1.92927e-10,0.0132542,0.000178388,6.99638e-07,6.94305e-10,0.0134333,0.00017979,7.01721e-07,7.55108e-10,0.0136138,0.000181195,7.03986e-07,1.05918e-11,0.0137957,0.000182603,7.04018e-07,1.06513e-09,0.013979,0.000184015,7.07214e-07,3.85512e-10,0.0141637,0.00018543,7.0837e-07,1.86769e-10,0.0143499,0.000186848,7.0893e-07,7.30116e-10,0.0145374,0.000188268,7.11121e-07,6.17983e-10,0.0147264,0.000189692,7.12975e-07,5.23282e-10,0.0149168,0.000191119,7.14545e-07,8.28398e-11,0.0151087,0.000192549,7.14793e-07,1.0081e-09,0.0153019,0.000193981,7.17817e-07,5.41244e-10,0.0154966,0.000195418,7.19441e-07,-3.7907e-10,0.0156928,0.000196856,7.18304e-07,1.90641e-09,0.0158903,0.000198298,7.24023e-07,-7.27387e-10,0.0160893,0.000199744,7.21841e-07,1.00317e-09,0.0162898,0.000201191,7.24851e-07,4.39949e-10,0.0164917,0.000202642,7.2617e-07,9.6234e-10,0.0166951,0.000204097,7.29057e-07,-5.64019e-10,0.0168999,0.000205554,7.27365e-07,1.29374e-09,0.0171062,0.000207012,7.31247e-07,9.77025e-10,0.017314,0.000208478,7.34178e-07,-1.47651e-09,0.0175232,0.000209942,7.29748e-07,3.06636e-09,0.0177338,0.00021141,7.38947e-07,-1.47573e-09,0.017946,0.000212884,7.3452e-07,9.7386e-10,0.0181596,0.000214356,7.37442e-07,1.30562e-09,0.0183747,0.000215835,7.41358e-07,-6.08376e-10,0.0185913,0.000217315,7.39533e-07,1.12785e-09,0.0188093,0.000218798,7.42917e-07,-1.77711e-10,0.0190289,0.000220283,7.42384e-07,1.44562e-09,0.0192499,0.000221772,7.46721e-07,-1.68825e-11,0.0194724,0.000223266,7.4667e-07,4.84533e-10,0.0196964,0.000224761,7.48124e-07,-5.85298e-11,0.0199219,0.000226257,7.47948e-07,1.61217e-09,0.0201489,0.000227757,7.52785e-07,-8.02136e-10,0.0203775,0.00022926,7.50378e-07,1.59637e-09,0.0206075,0.000230766,7.55167e-07,4.47168e-12,0.020839,0.000232276,7.55181e-07,2.48387e-10,0.021072,0.000233787,7.55926e-07,8.6474e-10,0.0213066,0.000235302,7.5852e-07,1.78299e-11,0.0215426,0.000236819,7.58573e-07,9.26567e-10,0.0217802,0.000238339,7.61353e-07,1.34529e-12,0.0220193,0.000239862,7.61357e-07,9.30659e-10,0.0222599,0.000241387,7.64149e-07,1.34529e-12,0.0225021,0.000242915,7.64153e-07,9.26567e-10,0.0227458,0.000244447,7.66933e-07,1.76215e-11,0.022991,0.00024598,7.66986e-07,8.65536e-10,0.0232377,0.000247517,7.69582e-07,2.45677e-10,0.023486,0.000249057,7.70319e-07,1.44193e-11,0.0237358,0.000250598,7.70363e-07,1.55918e-09,0.0239872,0.000252143,7.7504e-07,-6.63173e-10,0.0242401,0.000253691,7.73051e-07,1.09357e-09,0.0244946,0.000255241,7.76331e-07,1.41919e-11,0.0247506,0.000256793,7.76374e-07,7.12248e-10,0.0250082,0.000258348,7.78511e-07,8.62049e-10,0.0252673,0.000259908,7.81097e-07,-4.35061e-10,0.025528,0.000261469,7.79792e-07,8.7825e-10,0.0257902,0.000263031,7.82426e-07,6.47181e-10,0.0260541,0.000264598,7.84368e-07,2.58448e-10,0.0263194,0.000266167,7.85143e-07,1.81558e-10,0.0265864,0.000267738,7.85688e-07,8.78041e-10,0.0268549,0.000269312,7.88322e-07,3.15102e-11,0.027125,0.000270889,7.88417e-07,8.58525e-10,0.0273967,0.000272468,7.90992e-07,2.59812e-10,0.02767,0.000274051,7.91772e-07,-3.5224e-11,0.0279448,0.000275634,7.91666e-07,1.74377e-09,0.0282212,0.000277223,7.96897e-07,-1.35196e-09,0.0284992,0.000278813,7.92841e-07,1.80141e-09,0.0287788,0.000280404,7.98246e-07,-2.65629e-10,0.0290601,0.000281999,7.97449e-07,1.12374e-09,0.0293428,0.000283598,8.0082e-07,-5.04106e-10,0.0296272,0.000285198,7.99308e-07,8.92764e-10,0.0299132,0.000286799,8.01986e-07,6.58379e-10,0.0302008,0.000288405,8.03961e-07,1.98971e-10,0.0304901,0.000290014,8.04558e-07,4.08382e-10,0.0307809,0.000291624,8.05783e-07,3.01839e-11,0.0310733,0.000293236,8.05874e-07,1.33343e-09,0.0313673,0.000294851,8.09874e-07,2.2419e-10,0.031663,0.000296472,8.10547e-07,-3.67606e-10,0.0319603,0.000298092,8.09444e-07,1.24624e-09,0.0322592,0.000299714,8.13182e-07,-8.92025e-10,0.0325597,0.000301338,8.10506e-07,2.32183e-09,0.0328619,0.000302966,8.17472e-07,-9.44719e-10,0.0331657,0.000304598,8.14638e-07,1.45703e-09,0.0334711,0.000306232,8.19009e-07,-1.15805e-09,0.0337781,0.000307866,8.15535e-07,3.17507e-09,0.0340868,0.000309507,8.2506e-07,-4.09161e-09,0.0343971,0.000311145,8.12785e-07,5.74079e-09,0.0347091,0.000312788,8.30007e-07,-3.97034e-09,0.0350227,0.000314436,8.18096e-07,2.68985e-09,0.035338,0.00031608,8.26166e-07,6.61676e-10,0.0356549,0.000317734,8.28151e-07,-1.61123e-09,0.0359734,0.000319386,8.23317e-07,2.05786e-09,0.0362936,0.000321038,8.29491e-07,8.30388e-10,0.0366155,0.0003227,8.31982e-07,-1.65424e-09,0.036939,0.000324359,8.27019e-07,2.06129e-09,0.0372642,0.000326019,8.33203e-07,8.59719e-10,0.0375911,0.000327688,8.35782e-07,-1.77488e-09,0.0379196,0.000329354,8.30458e-07,2.51464e-09,0.0382498,0.000331023,8.38002e-07,-8.33135e-10,0.0385817,0.000332696,8.35502e-07,8.17825e-10,0.0389152,0.00033437,8.37956e-07,1.28718e-09,0.0392504,0.00033605,8.41817e-07,-2.2413e-09,0.0395873,0.000337727,8.35093e-07,3.95265e-09,0.0399258,0.000339409,8.46951e-07,-2.39332e-09,0.0402661,0.000341095,8.39771e-07,1.89533e-09,0.040608,0.000342781,8.45457e-07,-1.46271e-09,0.0409517,0.000344467,8.41069e-07,3.95554e-09,0.041297,0.000346161,8.52936e-07,-3.18369e-09,0.041644,0.000347857,8.43385e-07,1.32873e-09,0.0419927,0.000349548,8.47371e-07,1.59402e-09,0.0423431,0.000351248,8.52153e-07,-2.54336e-10,0.0426952,0.000352951,8.5139e-07,-5.76676e-10,0.043049,0.000354652,8.4966e-07,2.56114e-09,0.0434045,0.000356359,8.57343e-07,-2.21744e-09,0.0437617,0.000358067,8.50691e-07,2.58344e-09,0.0441206,0.000359776,8.58441e-07,-6.65826e-10,0.0444813,0.000361491,8.56444e-07,7.99218e-11,0.0448436,0.000363204,8.56684e-07,3.46063e-10,0.0452077,0.000364919,8.57722e-07,2.26116e-09,0.0455734,0.000366641,8.64505e-07,-1.94005e-09,0.045941,0.000368364,8.58685e-07,1.77384e-09,0.0463102,0.000370087,8.64007e-07,-1.43005e-09,0.0466811,0.000371811,8.59717e-07,3.94634e-09,0.0470538,0.000373542,8.71556e-07,-3.17946e-09,0.0474282,0.000375276,8.62017e-07,1.32104e-09,0.0478043,0.000377003,8.6598e-07,1.62045e-09,0.0481822,0.00037874,8.70842e-07,-3.52297e-10,0.0485618,0.000380481,8.69785e-07,-2.11211e-10,0.0489432,0.00038222,8.69151e-07,1.19716e-09,0.0493263,0.000383962,8.72743e-07,-8.52026e-10,0.0497111,0.000385705,8.70187e-07,2.21092e-09,0.0500977,0.000387452,8.76819e-07,-5.41339e-10,0.050486,0.000389204,8.75195e-07,-4.5361e-11,0.0508761,0.000390954,8.75059e-07,7.22669e-10,0.0512679,0.000392706,8.77227e-07,8.79936e-10,0.0516615,0.000394463,8.79867e-07,-5.17048e-10,0.0520568,0.000396222,8.78316e-07,1.18833e-09,0.0524539,0.000397982,8.81881e-07,-5.11022e-10,0.0528528,0.000399744,8.80348e-07,8.55683e-10,0.0532534,0.000401507,8.82915e-07,8.13562e-10,0.0536558,0.000403276,8.85356e-07,-3.84603e-10,0.05406,0.000405045,8.84202e-07,7.24962e-10,0.0544659,0.000406816,8.86377e-07,1.20986e-09,0.0548736,0.000408592,8.90006e-07,-1.83896e-09,0.0552831,0.000410367,8.84489e-07,2.42071e-09,0.0556944,0.000412143,8.91751e-07,-3.93413e-10,0.0561074,0.000413925,8.90571e-07,-8.46967e-10,0.0565222,0.000415704,8.8803e-07,3.78122e-09,0.0569388,0.000417491,8.99374e-07,-3.1021e-09,0.0573572,0.000419281,8.90068e-07,1.17658e-09,0.0577774,0.000421064,8.93597e-07,2.12117e-09,0.0581993,0.000422858,8.99961e-07,-2.21068e-09,0.0586231,0.000424651,8.93329e-07,2.9961e-09,0.0590486,0.000426447,9.02317e-07,-2.32311e-09,0.059476,0.000428244,8.95348e-07,2.57122e-09,0.0599051,0.000430043,9.03062e-07,-5.11098e-10,0.0603361,0.000431847,9.01528e-07,-5.27166e-10,0.0607688,0.000433649,8.99947e-07,2.61984e-09,0.0612034,0.000435457,9.07806e-07,-2.50141e-09,0.0616397,0.000437265,9.00302e-07,3.66045e-09,0.0620779,0.000439076,9.11283e-07,-4.68977e-09,0.0625179,0.000440885,8.97214e-07,7.64783e-09,0.0629597,0.000442702,9.20158e-07,-7.27499e-09,0.0634033,0.000444521,8.98333e-07,6.55113e-09,0.0638487,0.000446337,9.17986e-07,-4.02844e-09,0.0642959,0.000448161,9.05901e-07,2.11196e-09,0.064745,0.000449979,9.12236e-07,3.03125e-09,0.0651959,0.000451813,9.2133e-07,-6.78648e-09,0.0656486,0.000453635,9.00971e-07,9.21375e-09,0.0661032,0.000455464,9.28612e-07,-7.71684e-09,0.0665596,0.000457299,9.05462e-07,6.7522e-09,0.0670178,0.00045913,9.25718e-07,-4.3907e-09,0.0674778,0.000460968,9.12546e-07,3.36e-09,0.0679397,0.000462803,9.22626e-07,-1.59876e-09,0.0684034,0.000464644,9.1783e-07,3.0351e-09,0.068869,0.000466488,9.26935e-07,-3.09101e-09,0.0693364,0.000468333,9.17662e-07,1.8785e-09,0.0698057,0.000470174,9.23298e-07,3.02733e-09,0.0702768,0.00047203,9.3238e-07,-6.53722e-09,0.0707497,0.000473875,9.12768e-07,8.22054e-09,0.0712245,0.000475725,9.37429e-07,-3.99325e-09,0.0717012,0.000477588,9.2545e-07,3.01839e-10,0.0721797,0.00047944,9.26355e-07,2.78597e-09,0.0726601,0.000481301,9.34713e-07,-3.99507e-09,0.0731423,0.000483158,9.22728e-07,5.7435e-09,0.0736264,0.000485021,9.39958e-07,-4.07776e-09,0.0741123,0.000486888,9.27725e-07,3.11695e-09,0.0746002,0.000488753,9.37076e-07,-9.39394e-10,0.0750898,0.000490625,9.34258e-07,6.4055e-10,0.0755814,0.000492495,9.3618e-07,-1.62265e-09,0.0760748,0.000494363,9.31312e-07,5.84995e-09,0.0765701,0.000496243,9.48861e-07,-6.87601e-09,0.0770673,0.00049812,9.28233e-07,6.75296e-09,0.0775664,0.000499997,9.48492e-07,-5.23467e-09,0.0780673,0.000501878,9.32788e-07,6.73523e-09,0.0785701,0.000503764,9.52994e-07,-6.80514e-09,0.0790748,0.000505649,9.32578e-07,5.5842e-09,0.0795814,0.000507531,9.49331e-07,-6.30583e-10,0.0800899,0.000509428,9.47439e-07,-3.0618e-09,0.0806003,0.000511314,9.38254e-07,5.4273e-09,0.0811125,0.000513206,9.54536e-07,-3.74627e-09,0.0816267,0.000515104,9.43297e-07,2.10713e-09,0.0821427,0.000516997,9.49618e-07,2.76839e-09,0.0826607,0.000518905,9.57924e-07,-5.73006e-09,0.0831805,0.000520803,9.40733e-07,5.25072e-09,0.0837023,0.0005227,9.56486e-07,-3.71718e-10,0.084226,0.000524612,9.5537e-07,-3.76404e-09,0.0847515,0.000526512,9.44078e-07,7.97735e-09,0.085279,0.000528424,9.6801e-07,-5.79367e-09,0.0858084,0.000530343,9.50629e-07,2.96268e-10,0.0863397,0.000532245,9.51518e-07,4.6086e-09,0.0868729,0.000534162,9.65344e-07,-3.82947e-09,0.087408,0.000536081,9.53856e-07,3.25861e-09,0.087945,0.000537998,9.63631e-07,-1.7543e-09,0.088484,0.00053992,9.58368e-07,3.75849e-09,0.0890249,0.000541848,9.69644e-07,-5.82891e-09,0.0895677,0.00054377,9.52157e-07,4.65593e-09,0.0901124,0.000545688,9.66125e-07,2.10643e-09,0.0906591,0.000547627,9.72444e-07,-5.63099e-09,0.0912077,0.000549555,9.55551e-07,5.51627e-09,0.0917582,0.000551483,9.721e-07,-1.53292e-09,0.0923106,0.000553422,9.67501e-07,6.15311e-10,0.092865,0.000555359,9.69347e-07,-9.28291e-10,0.0934213,0.000557295,9.66562e-07,3.09774e-09,0.0939796,0.000559237,9.75856e-07,-4.01186e-09,0.0945398,0.000561177,9.6382e-07,5.49892e-09,0.095102,0.000563121,9.80317e-07,-3.08258e-09,0.0956661,0.000565073,9.71069e-07,-6.19176e-10,0.0962321,0.000567013,9.69212e-07,5.55932e-09,0.0968001,0.000568968,9.8589e-07,-6.71704e-09,0.09737,0.00057092,9.65738e-07,6.40762e-09,0.0979419,0.00057287,9.84961e-07,-4.0122e-09,0.0985158,0.000574828,9.72925e-07,2.19059e-09,0.0990916,0.000576781,9.79496e-07,2.70048e-09,0.0996693,0.000578748,9.87598e-07,-5.54193e-09,0.100249,0.000580706,9.70972e-07,4.56597e-09,0.100831,0.000582662,9.8467e-07,2.17923e-09,0.101414,0.000584638,9.91208e-07,-5.83232e-09,0.102,0.000586603,9.73711e-07,6.24884e-09,0.102588,0.000588569,9.92457e-07,-4.26178e-09,0.103177,0.000590541,9.79672e-07,3.34781e-09,0.103769,0.00059251,9.89715e-07,-1.67904e-09,0.104362,0.000594485,9.84678e-07,3.36839e-09,0.104958,0.000596464,9.94783e-07,-4.34397e-09,0.105555,0.000598441,9.81751e-07,6.55696e-09,0.106155,0.000600424,1.00142e-06,-6.98272e-09,0.106756,0.000602406,9.80474e-07,6.4728e-09,0.107359,0.000604386,9.99893e-07,-4.00742e-09,0.107965,0.000606374,9.8787e-07,2.10654e-09,0.108572,0.000608356,9.9419e-07,3.0318e-09,0.109181,0.000610353,1.00329e-06,-6.7832e-09,0.109793,0.00061234,9.82936e-07,9.1998e-09,0.110406,0.000614333,1.01054e-06,-7.6642e-09,0.111021,0.000616331,9.87543e-07,6.55579e-09,0.111639,0.000618326,1.00721e-06,-3.65791e-09,0.112258,0.000620329,9.96236e-07,6.25467e-10,0.112879,0.000622324,9.98113e-07,1.15593e-09,0.113503,0.000624323,1.00158e-06,2.20158e-09,0.114128,0.000626333,1.00819e-06,-2.51191e-09,0.114755,0.000628342,1.00065e-06,3.95517e-10,0.115385,0.000630345,1.00184e-06,9.29807e-10,0.116016,0.000632351,1.00463e-06,3.33599e-09,0.116649,0.00063437,1.01463e-06,-6.82329e-09,0.117285,0.000636379,9.94163e-07,9.05595e-09,0.117922,0.000638395,1.02133e-06,-7.04862e-09,0.118562,0.000640416,1.00019e-06,4.23737e-09,0.119203,0.000642429,1.0129e-06,-2.45033e-09,0.119847,0.000644448,1.00555e-06,5.56395e-09,0.120492,0.000646475,1.02224e-06,-4.9043e-09,0.121139,0.000648505,1.00753e-06,-8.47952e-10,0.121789,0.000650518,1.00498e-06,8.29622e-09,0.122441,0.000652553,1.02987e-06,-9.98538e-09,0.123094,0.000654582,9.99914e-07,9.2936e-09,0.12375,0.00065661,1.02779e-06,-4.83707e-09,0.124407,0.000658651,1.01328e-06,2.60411e-09,0.125067,0.000660685,1.0211e-06,-5.57945e-09,0.125729,0.000662711,1.00436e-06,1.22631e-08,0.126392,0.000664756,1.04115e-06,-1.36704e-08,0.127058,0.000666798,1.00014e-06,1.26161e-08,0.127726,0.000668836,1.03798e-06,-6.99155e-09,0.128396,0.000670891,1.01701e-06,4.48836e-10,0.129068,0.000672926,1.01836e-06,5.19606e-09,0.129742,0.000674978,1.03394e-06,-6.3319e-09,0.130418,0.000677027,1.01495e-06,5.2305e-09,0.131096,0.000679073,1.03064e-06,3.11123e-10,0.131776,0.000681135,1.03157e-06,-6.47511e-09,0.132458,0.000683179,1.01215e-06,1.06882e-08,0.133142,0.000685235,1.04421e-06,-6.47519e-09,0.133829,0.000687304,1.02479e-06,3.11237e-10,0.134517,0.000689355,1.02572e-06,5.23035e-09,0.135207,0.000691422,1.04141e-06,-6.3316e-09,0.1359,0.000693486,1.02242e-06,5.19484e-09,0.136594,0.000695546,1.038e-06,4.53497e-10,0.137291,0.000697623,1.03936e-06,-7.00891e-09,0.137989,0.000699681,1.01834e-06,1.2681e-08,0.13869,0.000701756,1.05638e-06,-1.39128e-08,0.139393,0.000703827,1.01464e-06,1.31679e-08,0.140098,0.000705896,1.05414e-06,-8.95659e-09,0.140805,0.000707977,1.02727e-06,7.75742e-09,0.141514,0.000710055,1.05055e-06,-7.17182e-09,0.142225,0.000712135,1.02903e-06,6.02862e-09,0.142938,0.000714211,1.04712e-06,-2.04163e-09,0.143653,0.000716299,1.04099e-06,2.13792e-09,0.144371,0.000718387,1.04741e-06,-6.51009e-09,0.14509,0.000720462,1.02787e-06,9.00123e-09,0.145812,0.000722545,1.05488e-06,3.07523e-10,0.146535,0.000724656,1.0558e-06,-1.02312e-08,0.147261,0.000726737,1.02511e-06,1.0815e-08,0.147989,0.000728819,1.05755e-06,-3.22681e-09,0.148719,0.000730925,1.04787e-06,2.09244e-09,0.14945,0.000733027,1.05415e-06,-5.143e-09,0.150185,0.00073512,1.03872e-06,3.57844e-09,0.150921,0.000737208,1.04946e-06,5.73027e-09,0.151659,0.000739324,1.06665e-06,-1.15983e-08,0.152399,0.000741423,1.03185e-06,1.08605e-08,0.153142,0.000743519,1.06443e-06,-2.04106e-09,0.153886,0.000745642,1.05831e-06,-2.69642e-09,0.154633,0.00074775,1.05022e-06,-2.07425e-09,0.155382,0.000749844,1.044e-06,1.09934e-08,0.156133,0.000751965,1.07698e-06,-1.20972e-08,0.156886,0.000754083,1.04069e-06,7.59288e-09,0.157641,0.000756187,1.06347e-06,-3.37305e-09,0.158398,0.000758304,1.05335e-06,5.89921e-09,0.159158,0.000760428,1.07104e-06,-5.32248e-09,0.159919,0.000762554,1.05508e-06,4.8927e-10,0.160683,0.000764666,1.05654e-06,3.36547e-09,0.161448,0.000766789,1.06664e-06,9.50081e-10,0.162216,0.000768925,1.06949e-06,-7.16568e-09,0.162986,0.000771043,1.04799e-06,1.28114e-08,0.163758,0.000773177,1.08643e-06,-1.42774e-08,0.164533,0.000775307,1.0436e-06,1.44956e-08,0.165309,0.000777438,1.08708e-06,-1.39025e-08,0.166087,0.00077957,1.04538e-06,1.13118e-08,0.166868,0.000781695,1.07931e-06,-1.54224e-09,0.167651,0.000783849,1.07468e-06,-5.14312e-09,0.168436,0.000785983,1.05925e-06,7.21381e-09,0.169223,0.000788123,1.0809e-06,-8.81096e-09,0.170012,0.000790259,1.05446e-06,1.31289e-08,0.170803,0.000792407,1.09385e-06,-1.39022e-08,0.171597,0.000794553,1.05214e-06,1.26775e-08,0.172392,0.000796695,1.09018e-06,-7.00557e-09,0.17319,0.000798855,1.06916e-06,4.43796e-10,0.17399,0.000800994,1.07049e-06,5.23031e-09,0.174792,0.000803151,1.08618e-06,-6.46397e-09,0.175596,0.000805304,1.06679e-06,5.72444e-09,0.176403,0.000807455,1.08396e-06,-1.53254e-09,0.177211,0.000809618,1.07937e-06,4.05673e-10,0.178022,0.000811778,1.08058e-06,-9.01916e-11,0.178835,0.000813939,1.08031e-06,-4.49821e-11,0.17965,0.000816099,1.08018e-06,2.70234e-10,0.180467,0.00081826,1.08099e-06,-1.03603e-09,0.181286,0.000820419,1.07788e-06,3.87392e-09,0.182108,0.000822587,1.0895e-06,4.41522e-10,0.182932,0.000824767,1.09083e-06,-5.63997e-09,0.183758,0.000826932,1.07391e-06,7.21707e-09,0.184586,0.000829101,1.09556e-06,-8.32718e-09,0.185416,0.000831267,1.07058e-06,1.11907e-08,0.186248,0.000833442,1.10415e-06,-6.63336e-09,0.187083,0.00083563,1.08425e-06,4.41484e-10,0.187919,0.0008378,1.08557e-06,4.86754e-09,0.188758,0.000839986,1.10017e-06,-5.01041e-09,0.189599,0.000842171,1.08514e-06,2.72811e-10,0.190443,0.000844342,1.08596e-06,3.91916e-09,0.191288,0.000846526,1.09772e-06,-1.04819e-09,0.192136,0.000848718,1.09457e-06,2.73531e-10,0.192985,0.000850908,1.0954e-06,-4.58916e-11,0.193837,0.000853099,1.09526e-06,-9.01158e-11,0.194692,0.000855289,1.09499e-06,4.06506e-10,0.195548,0.00085748,1.09621e-06,-1.53595e-09,0.196407,0.000859668,1.0916e-06,5.73717e-09,0.197267,0.000861869,1.10881e-06,-6.51164e-09,0.19813,0.000864067,1.08928e-06,5.40831e-09,0.198995,0.000866261,1.1055e-06,-2.20401e-10,0.199863,0.000868472,1.10484e-06,-4.52652e-09,0.200732,0.000870668,1.09126e-06,3.42508e-09,0.201604,0.000872861,1.10153e-06,5.72762e-09,0.202478,0.000875081,1.11872e-06,-1.14344e-08,0.203354,0.000877284,1.08441e-06,1.02076e-08,0.204233,0.000879484,1.11504e-06,4.06355e-10,0.205113,0.000881715,1.11626e-06,-1.18329e-08,0.205996,0.000883912,1.08076e-06,1.71227e-08,0.206881,0.000886125,1.13213e-06,-1.19546e-08,0.207768,0.000888353,1.09626e-06,8.93465e-10,0.208658,0.000890548,1.09894e-06,8.38062e-09,0.209549,0.000892771,1.12408e-06,-4.61353e-09,0.210443,0.000895006,1.11024e-06,-4.82756e-09,0.211339,0.000897212,1.09576e-06,9.02245e-09,0.212238,0.00089943,1.12283e-06,-1.45997e-09,0.213138,0.000901672,1.11845e-06,-3.18255e-09,0.214041,0.000903899,1.1089e-06,-7.11073e-10,0.214946,0.000906115,1.10677e-06,6.02692e-09,0.215853,0.000908346,1.12485e-06,-8.49548e-09,0.216763,0.00091057,1.09936e-06,1.30537e-08,0.217675,0.000912808,1.13852e-06,-1.3917e-08,0.218588,0.000915044,1.09677e-06,1.28121e-08,0.219505,0.000917276,1.13521e-06,-7.5288e-09,0.220423,0.000919523,1.11262e-06,2.40205e-09,0.221344,0.000921756,1.11983e-06,-2.07941e-09,0.222267,0.000923989,1.11359e-06,5.91551e-09,0.223192,0.000926234,1.13134e-06,-6.68149e-09,0.224119,0.000928477,1.11129e-06,5.90929e-09,0.225049,0.000930717,1.12902e-06,-2.05436e-09,0.22598,0.000932969,1.12286e-06,2.30807e-09,0.226915,0.000935222,1.12978e-06,-7.17796e-09,0.227851,0.00093746,1.10825e-06,1.15028e-08,0.228789,0.000939711,1.14276e-06,-9.03083e-09,0.22973,0.000941969,1.11566e-06,9.71932e-09,0.230673,0.00094423,1.14482e-06,-1.49452e-08,0.231619,0.000946474,1.09998e-06,2.02591e-08,0.232566,0.000948735,1.16076e-06,-2.13879e-08,0.233516,0.000950993,1.0966e-06,2.05888e-08,0.234468,0.000953247,1.15837e-06,-1.62642e-08,0.235423,0.000955515,1.10957e-06,1.46658e-08,0.236379,0.000957779,1.15357e-06,-1.25966e-08,0.237338,0.000960048,1.11578e-06,5.91793e-09,0.238299,0.000962297,1.13353e-06,3.82602e-09,0.239263,0.000964576,1.14501e-06,-6.3208e-09,0.240229,0.000966847,1.12605e-06,6.55613e-09,0.241197,0.000969119,1.14572e-06,-5.00268e-09,0.242167,0.000971395,1.13071e-06,-1.44659e-09,0.243139,0.000973652,1.12637e-06,1.07891e-08,0.244114,0.000975937,1.15874e-06,-1.19073e-08,0.245091,0.000978219,1.12302e-06,7.03782e-09,0.246071,0.000980486,1.14413e-06,-1.34276e-09,0.247052,0.00098277,1.1401e-06,-1.66669e-09,0.248036,0.000985046,1.1351e-06,8.00935e-09,0.249022,0.00098734,1.15913e-06,-1.54694e-08,0.250011,0.000989612,1.11272e-06,2.4066e-08,0.251002,0.000991909,1.18492e-06,-2.11901e-08,0.251995,0.000994215,1.12135e-06,1.08973e-09,0.25299,0.000996461,1.12462e-06,1.68311e-08,0.253988,0.000998761,1.17511e-06,-8.8094e-09,0.254987,0.00100109,1.14868e-06,-1.13958e-08,0.25599,0.00100335,1.1145e-06,2.45902e-08,0.256994,0.00100565,1.18827e-06,-2.73603e-08,0.258001,0.00100795,1.10618e-06,2.52464e-08,0.25901,0.00101023,1.18192e-06,-1.40207e-08,0.260021,0.00101256,1.13986e-06,1.03387e-09,0.261035,0.00101484,1.14296e-06,9.8853e-09,0.262051,0.00101715,1.17262e-06,-1.07726e-08,0.263069,0.00101947,1.1403e-06,3.40272e-09,0.26409,0.00102176,1.15051e-06,-2.83827e-09,0.265113,0.00102405,1.142e-06,7.95039e-09,0.266138,0.00102636,1.16585e-06,8.39047e-10,0.267166,0.00102869,1.16836e-06,-1.13066e-08,0.268196,0.00103099,1.13444e-06,1.4585e-08,0.269228,0.00103331,1.1782e-06,-1.72314e-08,0.270262,0.00103561,1.1265e-06,2.45382e-08,0.271299,0.00103794,1.20012e-06,-2.13166e-08,0.272338,0.00104028,1.13617e-06,1.12364e-09,0.273379,0.00104255,1.13954e-06,1.68221e-08,0.274423,0.00104488,1.19001e-06,-8.80736e-09,0.275469,0.00104723,1.16358e-06,-1.13948e-08,0.276518,0.00104953,1.1294e-06,2.45839e-08,0.277568,0.00105186,1.20315e-06,-2.73361e-08,0.278621,0.00105418,1.12114e-06,2.51559e-08,0.279677,0.0010565,1.19661e-06,-1.36832e-08,0.280734,0.00105885,1.15556e-06,-2.25706e-10,0.281794,0.00106116,1.15488e-06,1.45862e-08,0.282857,0.00106352,1.19864e-06,-2.83167e-08,0.283921,0.00106583,1.11369e-06,3.90759e-08,0.284988,0.00106817,1.23092e-06,-3.85801e-08,0.286058,0.00107052,1.11518e-06,2.58375e-08,0.287129,0.00107283,1.19269e-06,-5.16498e-09,0.288203,0.0010752,1.1772e-06,-5.17768e-09,0.28928,0.00107754,1.16167e-06,-3.92671e-09,0.290358,0.00107985,1.14988e-06,2.08846e-08,0.29144,0.00108221,1.21254e-06,-2.00072e-08,0.292523,0.00108458,1.15252e-06,-4.60659e-10,0.293609,0.00108688,1.15114e-06,2.18499e-08,0.294697,0.00108925,1.21669e-06,-2.73343e-08,0.295787,0.0010916,1.13468e-06,2.78826e-08,0.29688,0.00109395,1.21833e-06,-2.45915e-08,0.297975,0.00109632,1.14456e-06,1.08787e-08,0.299073,0.00109864,1.17719e-06,1.08788e-08,0.300172,0.00110102,1.20983e-06,-2.45915e-08,0.301275,0.00110337,1.13605e-06,2.78828e-08,0.302379,0.00110573,1.2197e-06,-2.73348e-08,0.303486,0.00110808,1.1377e-06,2.18518e-08,0.304595,0.00111042,1.20325e-06,-4.67556e-10,0.305707,0.00111283,1.20185e-06,-1.99816e-08,0.306821,0.00111517,1.14191e-06,2.07891e-08,0.307937,0.00111752,1.20427e-06,-3.57026e-09,0.309056,0.00111992,1.19356e-06,-6.50797e-09,0.310177,0.00112228,1.17404e-06,-2.00165e-10,0.3113,0.00112463,1.17344e-06,7.30874e-09,0.312426,0.001127,1.19536e-06,7.67424e-10,0.313554,0.00112939,1.19767e-06,-1.03784e-08,0.314685,0.00113176,1.16653e-06,1.09437e-08,0.315818,0.00113412,1.19936e-06,-3.59406e-09,0.316953,0.00113651,1.18858e-06,3.43251e-09,0.318091,0.0011389,1.19888e-06,-1.0136e-08,0.319231,0.00114127,1.16847e-06,7.30915e-09,0.320374,0.00114363,1.1904e-06,1.07018e-08,0.321518,0.00114604,1.2225e-06,-2.03137e-08,0.322666,0.00114842,1.16156e-06,1.09484e-08,0.323815,0.00115078,1.19441e-06,6.32224e-09,0.324967,0.00115319,1.21337e-06,-6.43509e-09,0.326122,0.00115559,1.19407e-06,-1.03842e-08,0.327278,0.00115795,1.16291e-06,1.81697e-08,0.328438,0.00116033,1.21742e-06,-2.6901e-09,0.329599,0.00116276,1.20935e-06,-7.40939e-09,0.330763,0.00116515,1.18713e-06,2.52533e-09,0.331929,0.00116754,1.1947e-06,-2.69191e-09,0.333098,0.00116992,1.18663e-06,8.24218e-09,0.334269,0.00117232,1.21135e-06,-4.74377e-10,0.335443,0.00117474,1.20993e-06,-6.34471e-09,0.336619,0.00117714,1.1909e-06,-3.94922e-09,0.337797,0.00117951,1.17905e-06,2.21417e-08,0.338978,0.00118193,1.24547e-06,-2.50128e-08,0.340161,0.00118435,1.17043e-06,1.8305e-08,0.341346,0.00118674,1.22535e-06,-1.84048e-08,0.342534,0.00118914,1.17013e-06,2.55121e-08,0.343725,0.00119156,1.24667e-06,-2.40389e-08,0.344917,0.00119398,1.17455e-06,1.10389e-08,0.346113,0.00119636,1.20767e-06,9.68574e-09,0.34731,0.0011988,1.23673e-06,-1.99797e-08,0.34851,0.00120122,1.17679e-06,1.06284e-08,0.349713,0.0012036,1.20867e-06,7.26868e-09,0.350917,0.00120604,1.23048e-06,-9.90072e-09,0.352125,0.00120847,1.20078e-06,2.53177e-09,0.353334,0.00121088,1.20837e-06,-2.26199e-10,0.354546,0.0012133,1.20769e-06,-1.62705e-09,0.355761,0.00121571,1.20281e-06,6.73435e-09,0.356978,0.00121813,1.22302e-06,4.49207e-09,0.358197,0.00122059,1.23649e-06,-2.47027e-08,0.359419,0.00122299,1.16238e-06,3.47142e-08,0.360643,0.00122542,1.26653e-06,-2.47472e-08,0.36187,0.00122788,1.19229e-06,4.66965e-09,0.363099,0.00123028,1.20629e-06,6.06872e-09,0.36433,0.00123271,1.2245e-06,8.57729e-10,0.365564,0.00123516,1.22707e-06,-9.49952e-09,0.366801,0.00123759,1.19858e-06,7.33792e-09,0.36804,0.00124001,1.22059e-06,9.95025e-09,0.369281,0.00124248,1.25044e-06,-1.73366e-08,0.370525,0.00124493,1.19843e-06,-2.08464e-10,0.371771,0.00124732,1.1978e-06,1.81704e-08,0.373019,0.00124977,1.25232e-06,-1.28683e-08,0.37427,0.00125224,1.21371e-06,3.50042e-09,0.375524,0.00125468,1.22421e-06,-1.1335e-09,0.37678,0.00125712,1.22081e-06,1.03345e-09,0.378038,0.00125957,1.22391e-06,-3.00023e-09,0.379299,0.00126201,1.21491e-06,1.09676e-08,0.380562,0.00126447,1.24781e-06,-1.10676e-08,0.381828,0.00126693,1.21461e-06,3.50042e-09,0.383096,0.00126937,1.22511e-06,-2.93403e-09,0.384366,0.00127181,1.21631e-06,8.23574e-09,0.385639,0.00127427,1.24102e-06,-2.06607e-10,0.386915,0.00127675,1.2404e-06,-7.40935e-09,0.388193,0.00127921,1.21817e-06,4.1761e-11,0.389473,0.00128165,1.21829e-06,7.24223e-09,0.390756,0.0012841,1.24002e-06,7.91564e-10,0.392042,0.00128659,1.2424e-06,-1.04086e-08,0.393329,0.00128904,1.21117e-06,1.10405e-08,0.39462,0.0012915,1.24429e-06,-3.951e-09,0.395912,0.00129397,1.23244e-06,4.7634e-09,0.397208,0.00129645,1.24673e-06,-1.51025e-08,0.398505,0.0012989,1.20142e-06,2.58443e-08,0.399805,0.00130138,1.27895e-06,-2.86702e-08,0.401108,0.00130385,1.19294e-06,2.92318e-08,0.402413,0.00130632,1.28064e-06,-2.86524e-08,0.403721,0.0013088,1.19468e-06,2.57731e-08,0.405031,0.00131127,1.272e-06,-1.48355e-08,0.406343,0.00131377,1.2275e-06,3.76652e-09,0.407658,0.00131623,1.23879e-06,-2.30784e-10,0.408976,0.00131871,1.2381e-06,-2.84331e-09,0.410296,0.00132118,1.22957e-06,1.16041e-08,0.411618,0.00132367,1.26438e-06,-1.37708e-08,0.412943,0.00132616,1.22307e-06,1.36768e-08,0.41427,0.00132865,1.2641e-06,-1.1134e-08,0.4156,0.00133114,1.2307e-06,1.05714e-09,0.416933,0.00133361,1.23387e-06,6.90538e-09,0.418267,0.00133609,1.25459e-06,1.12372e-09,0.419605,0.00133861,1.25796e-06,-1.14002e-08,0.420945,0.00134109,1.22376e-06,1.46747e-08,0.422287,0.00134358,1.26778e-06,-1.7496e-08,0.423632,0.00134606,1.21529e-06,2.5507e-08,0.424979,0.00134857,1.29182e-06,-2.49272e-08,0.426329,0.00135108,1.21703e-06,1.45972e-08,0.427681,0.00135356,1.26083e-06,-3.65935e-09,0.429036,0.00135607,1.24985e-06,4.00178e-11,0.430393,0.00135857,1.24997e-06,3.49917e-09,0.431753,0.00136108,1.26047e-06,-1.40366e-08,0.433116,0.00136356,1.21836e-06,2.28448e-08,0.43448,0.00136606,1.28689e-06,-1.77378e-08,0.435848,0.00136858,1.23368e-06,1.83043e-08,0.437218,0.0013711,1.28859e-06,-2.56769e-08,0.43859,0.0013736,1.21156e-06,2.47987e-08,0.439965,0.0013761,1.28595e-06,-1.39133e-08,0.441342,0.00137863,1.24421e-06,1.05202e-09,0.442722,0.00138112,1.24737e-06,9.70507e-09,0.444104,0.00138365,1.27649e-06,-1.00698e-08,0.445489,0.00138617,1.24628e-06,7.72123e-10,0.446877,0.00138867,1.24859e-06,6.98132e-09,0.448267,0.00139118,1.26954e-06,1.10477e-09,0.449659,0.00139373,1.27285e-06,-1.14003e-08,0.451054,0.00139624,1.23865e-06,1.4694e-08,0.452452,0.00139876,1.28273e-06,-1.75734e-08,0.453852,0.00140127,1.23001e-06,2.5797e-08,0.455254,0.00140381,1.3074e-06,-2.60097e-08,0.456659,0.00140635,1.22937e-06,1.86371e-08,0.458067,0.00140886,1.28529e-06,-1.8736e-08,0.459477,0.00141137,1.22908e-06,2.65048e-08,0.46089,0.00141391,1.30859e-06,-2.76784e-08,0.462305,0.00141645,1.22556e-06,2.46043e-08,0.463722,0.00141897,1.29937e-06,-1.11341e-08,0.465143,0.00142154,1.26597e-06,-9.87033e-09,0.466565,0.00142404,1.23636e-06,2.08131e-08,0.467991,0.00142657,1.2988e-06,-1.37773e-08,0.469419,0.00142913,1.25746e-06,4.49378e-09,0.470849,0.00143166,1.27094e-06,-4.19781e-09,0.472282,0.00143419,1.25835e-06,1.22975e-08,0.473717,0.00143674,1.29524e-06,-1.51902e-08,0.475155,0.00143929,1.24967e-06,1.86608e-08,0.476596,0.00144184,1.30566e-06,-2.96506e-08,0.478039,0.00144436,1.2167e-06,4.03368e-08,0.479485,0.00144692,1.33771e-06,-4.22896e-08,0.480933,0.00144947,1.21085e-06,3.94148e-08,0.482384,0.00145201,1.32909e-06,-2.59626e-08,0.483837,0.00145459,1.2512e-06,4.83124e-09,0.485293,0.0014571,1.2657e-06,6.63757e-09,0.486751,0.00145966,1.28561e-06,-1.57911e-09,0.488212,0.00146222,1.28087e-06,-3.21468e-10,0.489676,0.00146478,1.27991e-06,2.86517e-09,0.491142,0.00146735,1.2885e-06,-1.11392e-08,0.49261,0.00146989,1.25508e-06,1.18893e-08,0.494081,0.00147244,1.29075e-06,-6.61574e-09,0.495555,0.001475,1.27091e-06,1.45736e-08,0.497031,0.00147759,1.31463e-06,-2.18759e-08,0.49851,0.00148015,1.249e-06,1.33252e-08,0.499992,0.00148269,1.28897e-06,-1.62277e-09,0.501476,0.00148526,1.28411e-06,-6.83421e-09,0.502962,0.00148781,1.2636e-06,2.89596e-08,0.504451,0.00149042,1.35048e-06,-4.93997e-08,0.505943,0.00149298,1.20228e-06,4.94299e-08,0.507437,0.00149553,1.35057e-06,-2.91107e-08,0.508934,0.00149814,1.26324e-06,7.40848e-09,0.510434,0.00150069,1.28547e-06,-5.23187e-10,0.511936,0.00150326,1.2839e-06,-5.31585e-09,0.51344,0.00150581,1.26795e-06,2.17866e-08,0.514947,0.00150841,1.33331e-06,-2.22257e-08,0.516457,0.00151101,1.26663e-06,7.51178e-09,0.517969,0.00151357,1.28917e-06,-7.82128e-09,0.519484,0.00151613,1.2657e-06,2.37733e-08,0.521002,0.00151873,1.33702e-06,-2.76674e-08,0.522522,0.00152132,1.25402e-06,2.72917e-08,0.524044,0.00152391,1.3359e-06,-2.18949e-08,0.525569,0.00152652,1.27021e-06,6.83372e-10,0.527097,0.00152906,1.27226e-06,1.91613e-08,0.528628,0.00153166,1.32974e-06,-1.77241e-08,0.53016,0.00153427,1.27657e-06,-7.86963e-09,0.531696,0.0015368,1.25296e-06,4.92027e-08,0.533234,0.00153945,1.40057e-06,-6.9732e-08,0.534775,0.00154204,1.19138e-06,5.09114e-08,0.536318,0.00154458,1.34411e-06,-1.4704e-08,0.537864,0.00154722,1.3e-06,7.9048e-09,0.539413,0.00154984,1.32371e-06,-1.69152e-08,0.540964,0.00155244,1.27297e-06,1.51355e-10,0.542517,0.00155499,1.27342e-06,1.63099e-08,0.544074,0.00155758,1.32235e-06,-5.78647e-09,0.545633,0.00156021,1.30499e-06,6.83599e-09,0.547194,0.00156284,1.3255e-06,-2.15575e-08,0.548758,0.00156543,1.26083e-06,1.97892e-08,0.550325,0.00156801,1.32019e-06,2.00525e-09,0.551894,0.00157065,1.32621e-06,-2.78103e-08,0.553466,0.00157322,1.24278e-06,4.96314e-08,0.555041,0.00157586,1.39167e-06,-5.1506e-08,0.556618,0.00157849,1.23716e-06,3.71835e-08,0.558198,0.00158107,1.34871e-06,-3.76233e-08,0.55978,0.00158366,1.23584e-06,5.37052e-08,0.561365,0.00158629,1.39695e-06,-5.79884e-08,0.562953,0.00158891,1.22299e-06,5.90392e-08,0.564543,0.00159153,1.4001e-06,-5.89592e-08,0.566136,0.00159416,1.22323e-06,5.7588e-08,0.567731,0.00159678,1.39599e-06,-5.21835e-08,0.569329,0.00159941,1.23944e-06,3.19369e-08,0.57093,0.00160199,1.33525e-06,-1.59594e-08,0.572533,0.00160461,1.28737e-06,3.19006e-08,0.574139,0.00160728,1.38307e-06,-5.20383e-08,0.575748,0.00160989,1.22696e-06,5.70431e-08,0.577359,0.00161251,1.39809e-06,-5.69247e-08,0.578973,0.00161514,1.22731e-06,5.14463e-08,0.580589,0.00161775,1.38165e-06,-2.9651e-08,0.582208,0.00162042,1.2927e-06,7.55339e-09,0.58383,0.00162303,1.31536e-06,-5.62636e-10,0.585455,0.00162566,1.31367e-06,-5.30281e-09,0.587081,0.00162827,1.29776e-06,2.17738e-08,0.588711,0.00163093,1.36309e-06,-2.21875e-08,0.590343,0.00163359,1.29652e-06,7.37164e-09,0.591978,0.00163621,1.31864e-06,-7.29907e-09,0.593616,0.00163882,1.29674e-06,2.18247e-08,0.595256,0.00164148,1.36221e-06,-2.03952e-08,0.596899,0.00164414,1.30103e-06,1.51241e-10,0.598544,0.00164675,1.30148e-06,1.97902e-08,0.600192,0.00164941,1.36085e-06,-1.97074e-08,0.601843,0.00165207,1.30173e-06,-5.65175e-10,0.603496,0.00165467,1.30004e-06,2.1968e-08,0.605152,0.00165734,1.36594e-06,-2.77024e-08,0.606811,0.00165999,1.28283e-06,2.92369e-08,0.608472,0.00166264,1.37054e-06,-2.96407e-08,0.610136,0.00166529,1.28162e-06,2.97215e-08,0.611803,0.00166795,1.37079e-06,-2.96408e-08,0.613472,0.0016706,1.28186e-06,2.92371e-08,0.615144,0.00167325,1.36957e-06,-2.77031e-08,0.616819,0.00167591,1.28647e-06,2.19708e-08,0.618496,0.00167855,1.35238e-06,-5.75407e-10,0.620176,0.00168125,1.35065e-06,-1.9669e-08,0.621858,0.00168389,1.29164e-06,1.96468e-08,0.623544,0.00168653,1.35058e-06,6.86403e-10,0.625232,0.00168924,1.35264e-06,-2.23924e-08,0.626922,0.00169187,1.28547e-06,2.92788e-08,0.628615,0.00169453,1.3733e-06,-3.51181e-08,0.630311,0.00169717,1.26795e-06,5.15889e-08,0.63201,0.00169987,1.42272e-06,-5.2028e-08,0.633711,0.00170255,1.26663e-06,3.73139e-08,0.635415,0.0017052,1.37857e-06,-3.76227e-08,0.637121,0.00170784,1.2657e-06,5.35722e-08,0.63883,0.00171054,1.42642e-06,-5.74567e-08,0.640542,0.00171322,1.25405e-06,5.70456e-08,0.642257,0.0017159,1.42519e-06,-5.15163e-08,0.643974,0.00171859,1.27064e-06,2.98103e-08,0.645694,0.00172122,1.36007e-06,-8.12016e-09,0.647417,0.00172392,1.33571e-06,2.67039e-09,0.649142,0.0017266,1.34372e-06,-2.56152e-09,0.65087,0.00172928,1.33604e-06,7.57571e-09,0.6526,0.00173197,1.35876e-06,-2.77413e-08,0.654334,0.00173461,1.27554e-06,4.3785e-08,0.65607,0.00173729,1.40689e-06,-2.81896e-08,0.657808,0.00174002,1.32233e-06,9.36893e-09,0.65955,0.00174269,1.35043e-06,-9.28617e-09,0.661294,0.00174536,1.32257e-06,2.77757e-08,0.66304,0.00174809,1.4059e-06,-4.2212e-08,0.66479,0.00175078,1.27926e-06,2.1863e-08,0.666542,0.0017534,1.34485e-06,1.43648e-08,0.668297,0.00175613,1.38795e-06,-1.97177e-08,0.670054,0.00175885,1.3288e-06,4.90115e-09,0.671814,0.00176152,1.3435e-06,1.13232e-10,0.673577,0.00176421,1.34384e-06,-5.3542e-09,0.675343,0.00176688,1.32778e-06,2.13035e-08,0.677111,0.0017696,1.39169e-06,-2.02553e-08,0.678882,0.00177232,1.33092e-06,1.13005e-10,0.680656,0.00177499,1.33126e-06,1.98031e-08,0.682432,0.00177771,1.39067e-06,-1.97211e-08,0.684211,0.00178043,1.33151e-06,-5.2349e-10,0.685993,0.00178309,1.32994e-06,2.18151e-08,0.687777,0.00178582,1.39538e-06,-2.71325e-08,0.689564,0.00178853,1.31398e-06,2.71101e-08,0.691354,0.00179124,1.39531e-06,-2.17035e-08,0.693147,0.00179396,1.3302e-06,9.92865e-11,0.694942,0.00179662,1.3305e-06,2.13063e-08,0.69674,0.00179935,1.39442e-06,-2.57198e-08,0.698541,0.00180206,1.31726e-06,2.19682e-08,0.700344,0.00180476,1.38317e-06,-2.54852e-09,0.70215,0.00180752,1.37552e-06,-1.17741e-08,0.703959,0.00181023,1.3402e-06,-9.95999e-09,0.705771,0.00181288,1.31032e-06,5.16141e-08,0.707585,0.00181566,1.46516e-06,-7.72869e-08,0.709402,0.00181836,1.2333e-06,7.87197e-08,0.711222,0.00182106,1.46946e-06,-5.87781e-08,0.713044,0.00182382,1.29312e-06,3.71834e-08,0.714869,0.00182652,1.40467e-06,-3.03511e-08,0.716697,0.00182924,1.31362e-06,2.46161e-08,0.718528,0.00183194,1.38747e-06,-8.5087e-09,0.720361,0.00183469,1.36194e-06,9.41892e-09,0.722197,0.00183744,1.3902e-06,-2.91671e-08,0.724036,0.00184014,1.3027e-06,4.76448e-08,0.725878,0.00184288,1.44563e-06,-4.22028e-08,0.727722,0.00184565,1.31902e-06,1.95682e-09,0.729569,0.00184829,1.3249e-06,3.43754e-08,0.731419,0.00185104,1.42802e-06,-2.0249e-08,0.733271,0.00185384,1.36727e-06,-1.29838e-08,0.735126,0.00185654,1.32832e-06,1.25794e-08,0.736984,0.00185923,1.36606e-06,2.22711e-08,0.738845,0.00186203,1.43287e-06,-4.20594e-08,0.740708,0.00186477,1.3067e-06,2.67571e-08,0.742574,0.00186746,1.38697e-06,-5.36424e-09,0.744443,0.00187022,1.37087e-06,-5.30023e-09,0.746315,0.00187295,1.35497e-06,2.65653e-08,0.748189,0.00187574,1.43467e-06,-4.13564e-08,0.750066,0.00187848,1.3106e-06,1.9651e-08,0.751946,0.00188116,1.36955e-06,2.23572e-08,0.753828,0.00188397,1.43663e-06,-4.9475e-08,0.755714,0.00188669,1.2882e-06,5.63335e-08,0.757602,0.00188944,1.4572e-06,-5.66499e-08,0.759493,0.00189218,1.28725e-06,5.10567e-08,0.761386,0.00189491,1.44042e-06,-2.83677e-08,0.763283,0.00189771,1.35532e-06,2.80962e-09,0.765182,0.00190042,1.36375e-06,1.71293e-08,0.767083,0.0019032,1.41513e-06,-1.17221e-08,0.768988,0.001906,1.37997e-06,-2.98453e-08,0.770895,0.00190867,1.29043e-06,7.14987e-08,0.772805,0.00191146,1.50493e-06,-7.73354e-08,0.774718,0.00191424,1.27292e-06,5.90292e-08,0.776634,0.00191697,1.45001e-06,-3.9572e-08,0.778552,0.00191975,1.33129e-06,3.9654e-08,0.780473,0.00192253,1.45026e-06,-5.94395e-08,0.782397,0.00192525,1.27194e-06,7.88945e-08,0.784324,0.00192803,1.50862e-06,-7.73249e-08,0.786253,0.00193082,1.27665e-06,5.15913e-08,0.788185,0.00193352,1.43142e-06,-9.83099e-09,0.79012,0.00193636,1.40193e-06,-1.22672e-08,0.792058,0.00193912,1.36513e-06,-7.05275e-10,0.793999,0.00194185,1.36301e-06,1.50883e-08,0.795942,0.00194462,1.40828e-06,-4.33147e-11,0.797888,0.00194744,1.40815e-06,-1.49151e-08,0.799837,0.00195021,1.3634e-06,9.93244e-11,0.801788,0.00195294,1.3637e-06,1.45179e-08,0.803743,0.00195571,1.40725e-06,1.43363e-09,0.8057,0.00195853,1.41155e-06,-2.02525e-08,0.80766,0.00196129,1.35079e-06,1.99718e-08,0.809622,0.00196405,1.41071e-06,-3.01649e-11,0.811588,0.00196687,1.41062e-06,-1.9851e-08,0.813556,0.00196964,1.35107e-06,1.98296e-08,0.815527,0.0019724,1.41056e-06,1.37485e-10,0.817501,0.00197522,1.41097e-06,-2.03796e-08,0.819477,0.00197798,1.34983e-06,2.17763e-08,0.821457,0.00198074,1.41516e-06,-7.12085e-09,0.823439,0.00198355,1.3938e-06,6.70707e-09,0.825424,0.00198636,1.41392e-06,-1.97074e-08,0.827412,0.00198913,1.35479e-06,1.25179e-08,0.829402,0.00199188,1.39235e-06,2.92405e-08,0.831396,0.00199475,1.48007e-06,-6.98755e-08,0.833392,0.0019975,1.27044e-06,7.14477e-08,0.835391,0.00200026,1.48479e-06,-3.71014e-08,0.837392,0.00200311,1.37348e-06,1.73533e-08,0.839397,0.00200591,1.42554e-06,-3.23118e-08,0.841404,0.00200867,1.32861e-06,5.2289e-08,0.843414,0.00201148,1.48547e-06,-5.76348e-08,0.845427,0.00201428,1.31257e-06,5.9041e-08,0.847443,0.00201708,1.48969e-06,-5.93197e-08,0.849461,0.00201988,1.31173e-06,5.90289e-08,0.851482,0.00202268,1.48882e-06,-5.75864e-08,0.853507,0.00202549,1.31606e-06,5.21075e-08,0.855533,0.00202828,1.47238e-06,-3.16344e-08,0.857563,0.00203113,1.37748e-06,1.48257e-08,0.859596,0.00203393,1.42196e-06,-2.76684e-08,0.861631,0.00203669,1.33895e-06,3.62433e-08,0.863669,0.00203947,1.44768e-06,1.90463e-09,0.86571,0.00204237,1.45339e-06,-4.38617e-08,0.867754,0.00204515,1.32181e-06,5.43328e-08,0.8698,0.00204796,1.48481e-06,-5.42603e-08,0.87185,0.00205076,1.32203e-06,4.34989e-08,0.873902,0.00205354,1.45252e-06,-5.26029e-10,0.875957,0.00205644,1.45095e-06,-4.13949e-08,0.878015,0.00205922,1.32676e-06,4.68962e-08,0.880075,0.00206201,1.46745e-06,-2.69807e-08,0.882139,0.00206487,1.38651e-06,1.42181e-09,0.884205,0.00206764,1.39077e-06,2.12935e-08,0.886274,0.00207049,1.45465e-06,-2.69912e-08,0.888346,0.00207332,1.37368e-06,2.70664e-08,0.890421,0.00207615,1.45488e-06,-2.16698e-08,0.892498,0.00207899,1.38987e-06,8.14756e-12,0.894579,0.00208177,1.38989e-06,2.16371e-08,0.896662,0.00208462,1.45481e-06,-2.6952e-08,0.898748,0.00208744,1.37395e-06,2.65663e-08,0.900837,0.00209027,1.45365e-06,-1.97084e-08,0.902928,0.00209312,1.39452e-06,-7.33731e-09,0.905023,0.00209589,1.37251e-06,4.90578e-08,0.90712,0.00209878,1.51968e-06,-6.96845e-08,0.90922,0.00210161,1.31063e-06,5.08664e-08,0.911323,0.00210438,1.46323e-06,-1.45717e-08,0.913429,0.00210727,1.41952e-06,7.42038e-09,0.915538,0.00211013,1.44178e-06,-1.51097e-08,0.917649,0.00211297,1.39645e-06,-6.58618e-09,0.919764,0.00211574,1.37669e-06,4.14545e-08,0.921881,0.00211862,1.50105e-06,-4.00222e-08,0.924001,0.0021215,1.38099e-06,-5.7518e-10,0.926124,0.00212426,1.37926e-06,4.23229e-08,0.92825,0.00212714,1.50623e-06,-4.9507e-08,0.930378,0.00213001,1.35771e-06,3.64958e-08,0.93251,0.00213283,1.4672e-06,-3.68713e-08,0.934644,0.00213566,1.35658e-06,5.13848e-08,0.936781,0.00213852,1.51074e-06,-4.94585e-08,0.938921,0.0021414,1.36236e-06,2.72399e-08,0.941064,0.0021442,1.44408e-06,1.0372e-10,0.943209,0.00214709,1.44439e-06,-2.76547e-08,0.945358,0.0021499,1.36143e-06,5.09106e-08,0.947509,0.00215277,1.51416e-06,-5.67784e-08,0.949663,0.00215563,1.34382e-06,5.69935e-08,0.95182,0.00215849,1.5148e-06,-5.19861e-08,0.95398,0.00216136,1.35885e-06,3.17417e-08,0.956143,0.00216418,1.45407e-06,-1.53758e-08,0.958309,0.00216704,1.40794e-06,2.97615e-08,0.960477,0.00216994,1.49723e-06,-4.40657e-08,0.962649,0.00217281,1.36503e-06,2.72919e-08,0.964823,0.00217562,1.44691e-06,-5.49729e-09,0.967,0.0021785,1.43041e-06,-5.30273e-09,0.96918,0.00218134,1.41451e-06,2.67084e-08,0.971363,0.00218425,1.49463e-06,-4.19265e-08,0.973548,0.00218711,1.36885e-06,2.17881e-08,0.975737,0.00218992,1.43422e-06,1.43789e-08,0.977928,0.00219283,1.47735e-06,-1.96989e-08,0.980122,0.00219572,1.41826e-06,4.81221e-09,0.98232,0.00219857,1.43269e-06,4.50048e-10,0.98452,0.00220144,1.43404e-06,-6.61237e-09,0.986722,0.00220429,1.41421e-06,2.59993e-08,0.988928,0.0022072,1.4922e-06,-3.77803e-08,0.991137,0.00221007,1.37886e-06,5.9127e-09,0.993348,0.00221284,1.3966e-06,1.33339e-07,0.995563,0.00221604,1.79662e-06,-5.98872e-07,0.99778,0.00222015,0.,0.}; + + template + __device__ __forceinline__ void RGB2LabConvert_f(const T& src, D& dst) + { + const float _1_3 = 1.0f / 3.0f; + const float _a = 16.0f / 116.0f; + + float B = blueIdx == 0 ? src.x : src.z; + float G = src.y; + float R = blueIdx == 0 ? src.z : src.x; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + } + + float X = B * 0.189828f + G * 0.376219f + R * 0.433953f; + float Y = B * 0.072169f + G * 0.715160f + R * 0.212671f; + float Z = B * 0.872766f + G * 0.109477f + R * 0.017758f; + + float FX = X > 0.008856f ? ::powf(X, _1_3) : (7.787f * X + _a); + float FY = Y > 0.008856f ? ::powf(Y, _1_3) : (7.787f * Y + _a); + float FZ = Z > 0.008856f ? ::powf(Z, _1_3) : (7.787f * Z + _a); + + float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y); + float a = 500.f * (FX - FY); + float b = 200.f * (FY - FZ); + + dst.x = L; + dst.y = a; + dst.z = b; + } + + template struct RGB2Lab; + template + struct RGB2Lab + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LabConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} + }; + template + struct RGB2Lab + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LabConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2Lab functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_sRGBInvGammaTab[] = {0,0.0126255,0.,-8.33961e-06,0.0126172,0.0126005,-2.50188e-05,4.1698e-05,0.0252344,0.0126756,0.000100075,-0.000158451,0.0378516,0.0124004,-0.000375277,-0.000207393,0.0496693,0.0110276,-0.000997456,0.00016837,0.0598678,0.00953783,-0.000492346,2.07235e-05,0.068934,0.00861531,-0.000430176,3.62876e-05,0.0771554,0.00786382,-0.000321313,1.87625e-05,0.0847167,0.00727748,-0.000265025,1.53594e-05,0.0917445,0.00679351,-0.000218947,1.10545e-05,0.0983301,0.00638877,-0.000185784,8.66984e-06,0.104542,0.00604322,-0.000159774,6.82996e-06,0.110432,0.00574416,-0.000139284,5.51008e-06,0.116042,0.00548212,-0.000122754,4.52322e-06,0.121406,0.00525018,-0.000109184,3.75557e-06,0.126551,0.00504308,-9.79177e-05,3.17134e-06,0.131499,0.00485676,-8.84037e-05,2.68469e-06,0.13627,0.004688,-8.03496e-05,2.31725e-06,0.14088,0.00453426,-7.33978e-05,2.00868e-06,0.145343,0.00439349,-6.73718e-05,1.74775e-06,0.149671,0.00426399,-6.21286e-05,1.53547e-06,0.153875,0.00414434,-5.75222e-05,1.364e-06,0.157963,0.00403338,-5.34301e-05,1.20416e-06,0.161944,0.00393014,-4.98177e-05,1.09114e-06,0.165825,0.00383377,-4.65443e-05,9.57987e-07,0.169613,0.00374356,-4.36703e-05,8.88359e-07,0.173314,0.00365888,-4.10052e-05,7.7849e-07,0.176933,0.00357921,-3.86697e-05,7.36254e-07,0.180474,0.00350408,-3.6461e-05,6.42534e-07,0.183942,0.00343308,-3.45334e-05,6.12614e-07,0.187342,0.00336586,-3.26955e-05,5.42894e-07,0.190675,0.00330209,-3.10669e-05,5.08967e-07,0.193947,0.00324149,-2.954e-05,4.75977e-07,0.197159,0.00318383,-2.8112e-05,4.18343e-07,0.200315,0.00312887,-2.6857e-05,4.13651e-07,0.203418,0.00307639,-2.5616e-05,3.70847e-07,0.206469,0.00302627,-2.45035e-05,3.3813e-07,0.209471,0.00297828,-2.34891e-05,3.32999e-07,0.212426,0.0029323,-2.24901e-05,2.96826e-07,0.215336,0.00288821,-2.15996e-05,2.82736e-07,0.218203,0.00284586,-2.07514e-05,2.70961e-07,0.221029,0.00280517,-1.99385e-05,2.42744e-07,0.223814,0.00276602,-1.92103e-05,2.33277e-07,0.226561,0.0027283,-1.85105e-05,2.2486e-07,0.229271,0.00269195,-1.78359e-05,2.08383e-07,0.231945,0.00265691,-1.72108e-05,1.93305e-07,0.234585,0.00262307,-1.66308e-05,1.80687e-07,0.237192,0.00259035,-1.60888e-05,1.86632e-07,0.239766,0.00255873,-1.55289e-05,1.60569e-07,0.24231,0.00252815,-1.50472e-05,1.54566e-07,0.244823,0.00249852,-1.45835e-05,1.59939e-07,0.247307,0.00246983,-1.41037e-05,1.29549e-07,0.249763,0.00244202,-1.3715e-05,1.41429e-07,0.252191,0.00241501,-1.32907e-05,1.39198e-07,0.254593,0.00238885,-1.28731e-05,1.06444e-07,0.256969,0.00236342,-1.25538e-05,1.2048e-07,0.25932,0.00233867,-1.21924e-05,1.26892e-07,0.261647,0.00231467,-1.18117e-05,8.72084e-08,0.26395,0.00229131,-1.15501e-05,1.20323e-07,0.26623,0.00226857,-1.11891e-05,8.71514e-08,0.268487,0.00224645,-1.09276e-05,9.73165e-08,0.270723,0.00222489,-1.06357e-05,8.98259e-08,0.272937,0.00220389,-1.03662e-05,7.98218e-08,0.275131,0.00218339,-1.01267e-05,9.75254e-08,0.277304,0.00216343,-9.83416e-06,6.65195e-08,0.279458,0.00214396,-9.63461e-06,8.34313e-08,0.281592,0.00212494,-9.38431e-06,7.65919e-08,0.283708,0.00210641,-9.15454e-06,5.7236e-08,0.285805,0.00208827,-8.98283e-06,8.18939e-08,0.287885,0.00207055,-8.73715e-06,6.2224e-08,0.289946,0.00205326,-8.55047e-06,5.66388e-08,0.291991,0.00203633,-8.38056e-06,6.88491e-08,0.294019,0.00201978,-8.17401e-06,5.53955e-08,0.296031,0.00200359,-8.00782e-06,6.71971e-08,0.298027,0.00198778,-7.80623e-06,3.34439e-08,0.300007,0.00197227,-7.7059e-06,6.7248e-08,0.301971,0.00195706,-7.50416e-06,5.51915e-08,0.303921,0.00194221,-7.33858e-06,3.98124e-08,0.305856,0.00192766,-7.21915e-06,5.37795e-08,0.307776,0.00191338,-7.05781e-06,4.30919e-08,0.309683,0.00189939,-6.92853e-06,4.20744e-08,0.311575,0.00188566,-6.80231e-06,5.68321e-08,0.313454,0.00187223,-6.63181e-06,2.86195e-08,0.31532,0.00185905,-6.54595e-06,3.73075e-08,0.317172,0.00184607,-6.43403e-06,6.05684e-08,0.319012,0.00183338,-6.25233e-06,1.84426e-08,0.320839,0.00182094,-6.197e-06,4.44757e-08,0.322654,0.00180867,-6.06357e-06,4.20729e-08,0.324456,0.00179667,-5.93735e-06,2.56511e-08,0.326247,0.00178488,-5.8604e-06,3.41368e-08,0.328026,0.00177326,-5.75799e-06,4.64177e-08,0.329794,0.00176188,-5.61874e-06,1.86107e-08,0.33155,0.0017507,-5.5629e-06,2.81511e-08,0.333295,0.00173966,-5.47845e-06,4.75987e-08,0.335029,0.00172884,-5.33565e-06,1.98726e-08,0.336753,0.00171823,-5.27604e-06,2.19226e-08,0.338466,0.00170775,-5.21027e-06,4.14483e-08,0.340169,0.00169745,-5.08592e-06,2.09017e-08,0.341861,0.00168734,-5.02322e-06,2.39561e-08,0.343543,0.00167737,-4.95135e-06,3.22852e-08,0.345216,0.00166756,-4.85449e-06,2.57173e-08,0.346878,0.00165793,-4.77734e-06,1.38569e-08,0.348532,0.00164841,-4.73577e-06,3.80634e-08,0.350175,0.00163906,-4.62158e-06,1.27043e-08,0.35181,0.00162985,-4.58347e-06,3.03279e-08,0.353435,0.00162078,-4.49249e-06,1.49961e-08,0.355051,0.00161184,-4.4475e-06,2.88977e-08,0.356659,0.00160303,-4.3608e-06,1.84241e-08,0.358257,0.00159436,-4.30553e-06,1.6616e-08,0.359848,0.0015858,-4.25568e-06,3.43218e-08,0.361429,0.00157739,-4.15272e-06,-4.89172e-09,0.363002,0.00156907,-4.16739e-06,4.48498e-08,0.364567,0.00156087,-4.03284e-06,4.30676e-09,0.366124,0.00155282,-4.01992e-06,2.73303e-08,0.367673,0.00154486,-3.93793e-06,5.58036e-09,0.369214,0.001537,-3.92119e-06,3.97554e-08,0.370747,0.00152928,-3.80193e-06,-1.55904e-08,0.372272,0.00152163,-3.8487e-06,5.24081e-08,0.37379,0.00151409,-3.69147e-06,-1.52272e-08,0.375301,0.00150666,-3.73715e-06,3.83028e-08,0.376804,0.0014993,-3.62225e-06,1.10278e-08,0.378299,0.00149209,-3.58916e-06,6.99326e-09,0.379788,0.00148493,-3.56818e-06,2.06038e-08,0.381269,0.00147786,-3.50637e-06,2.98009e-08,0.382744,0.00147093,-3.41697e-06,-2.05978e-08,0.384211,0.00146404,-3.47876e-06,5.25899e-08,0.385672,0.00145724,-3.32099e-06,-1.09471e-08,0.387126,0.00145056,-3.35383e-06,2.10009e-08,0.388573,0.00144392,-3.29083e-06,1.63501e-08,0.390014,0.00143739,-3.24178e-06,3.00641e-09,0.391448,0.00143091,-3.23276e-06,3.12282e-08,0.392875,0.00142454,-3.13908e-06,-8.70932e-09,0.394297,0.00141824,-3.16521e-06,3.34114e-08,0.395712,0.00141201,-3.06497e-06,-5.72754e-09,0.397121,0.00140586,-3.08215e-06,1.9301e-08,0.398524,0.00139975,-3.02425e-06,1.7931e-08,0.39992,0.00139376,-2.97046e-06,-1.61822e-09,0.401311,0.00138781,-2.97531e-06,1.83442e-08,0.402696,0.00138192,-2.92028e-06,1.76485e-08,0.404075,0.00137613,-2.86733e-06,4.68617e-10,0.405448,0.00137039,-2.86593e-06,1.02794e-08,0.406816,0.00136469,-2.83509e-06,1.80179e-08,0.408178,0.00135908,-2.78104e-06,7.05594e-09,0.409534,0.00135354,-2.75987e-06,1.33633e-08,0.410885,0.00134806,-2.71978e-06,-9.04568e-10,0.41223,0.00134261,-2.72249e-06,2.0057e-08,0.41357,0.00133723,-2.66232e-06,1.00841e-08,0.414905,0.00133194,-2.63207e-06,-7.88835e-10,0.416234,0.00132667,-2.63444e-06,2.28734e-08,0.417558,0.00132147,-2.56582e-06,-1.29785e-09,0.418877,0.00131633,-2.56971e-06,1.21205e-08,0.420191,0.00131123,-2.53335e-06,1.24202e-08,0.421499,0.0013062,-2.49609e-06,-2.19681e-09,0.422803,0.0013012,-2.50268e-06,2.61696e-08,0.424102,0.00129628,-2.42417e-06,-1.30747e-08,0.425396,0.00129139,-2.46339e-06,2.6129e-08,0.426685,0.00128654,-2.38501e-06,-2.03454e-09,0.427969,0.00128176,-2.39111e-06,1.18115e-08,0.429248,0.00127702,-2.35567e-06,1.43932e-08,0.430523,0.00127235,-2.31249e-06,-9.77965e-09,0.431793,0.00126769,-2.34183e-06,2.47253e-08,0.433058,0.00126308,-2.26766e-06,2.85278e-10,0.434319,0.00125855,-2.2668e-06,3.93614e-09,0.435575,0.00125403,-2.25499e-06,1.37722e-08,0.436827,0.00124956,-2.21368e-06,5.79803e-10,0.438074,0.00124513,-2.21194e-06,1.37112e-08,0.439317,0.00124075,-2.1708e-06,4.17973e-09,0.440556,0.00123642,-2.15826e-06,-6.27703e-10,0.44179,0.0012321,-2.16015e-06,2.81332e-08,0.44302,0.00122787,-2.07575e-06,-2.24985e-08,0.444246,0.00122365,-2.14324e-06,3.20586e-08,0.445467,0.00121946,-2.04707e-06,-1.6329e-08,0.446685,0.00121532,-2.09605e-06,3.32573e-08,0.447898,0.00121122,-1.99628e-06,-2.72927e-08,0.449107,0.00120715,-2.07816e-06,4.6111e-08,0.450312,0.00120313,-1.93983e-06,-3.79416e-08,0.451514,0.00119914,-2.05365e-06,4.60507e-08,0.452711,0.00119517,-1.9155e-06,-2.7052e-08,0.453904,0.00119126,-1.99666e-06,3.23551e-08,0.455093,0.00118736,-1.89959e-06,-1.29613e-08,0.456279,0.00118352,-1.93848e-06,1.94905e-08,0.45746,0.0011797,-1.88e-06,-5.39588e-09,0.458638,0.00117593,-1.89619e-06,2.09282e-09,0.459812,0.00117214,-1.88991e-06,2.68267e-08,0.460982,0.00116844,-1.80943e-06,-1.99925e-08,0.462149,0.00116476,-1.86941e-06,2.3341e-08,0.463312,0.00116109,-1.79939e-06,-1.37674e-08,0.464471,0.00115745,-1.84069e-06,3.17287e-08,0.465627,0.00115387,-1.7455e-06,-2.37407e-08,0.466779,0.00115031,-1.81673e-06,3.34315e-08,0.467927,0.00114677,-1.71643e-06,-2.05786e-08,0.469073,0.00114328,-1.77817e-06,1.90802e-08,0.470214,0.00113978,-1.72093e-06,3.86247e-09,0.471352,0.00113635,-1.70934e-06,-4.72759e-09,0.472487,0.00113292,-1.72352e-06,1.50478e-08,0.473618,0.00112951,-1.67838e-06,4.14108e-09,0.474746,0.00112617,-1.66595e-06,-1.80986e-09,0.47587,0.00112283,-1.67138e-06,3.09816e-09,0.476991,0.0011195,-1.66209e-06,1.92198e-08,0.478109,0.00111623,-1.60443e-06,-2.03726e-08,0.479224,0.00111296,-1.66555e-06,3.2468e-08,0.480335,0.00110973,-1.56814e-06,-2.00922e-08,0.481443,0.00110653,-1.62842e-06,1.80983e-08,0.482548,0.00110333,-1.57413e-06,7.30362e-09,0.48365,0.0011002,-1.55221e-06,-1.75107e-08,0.484749,0.00109705,-1.60475e-06,3.29373e-08,0.485844,0.00109393,-1.50594e-06,-2.48315e-08,0.486937,0.00109085,-1.58043e-06,3.65865e-08,0.488026,0.0010878,-1.47067e-06,-3.21078e-08,0.489112,0.00108476,-1.56699e-06,3.22397e-08,0.490195,0.00108172,-1.47027e-06,-7.44391e-09,0.491276,0.00107876,-1.49261e-06,-2.46428e-09,0.492353,0.00107577,-1.5e-06,1.73011e-08,0.493427,0.00107282,-1.4481e-06,-7.13552e-09,0.494499,0.0010699,-1.4695e-06,1.1241e-08,0.495567,0.001067,-1.43578e-06,-8.02637e-09,0.496633,0.0010641,-1.45986e-06,2.08645e-08,0.497695,0.00106124,-1.39726e-06,-1.58271e-08,0.498755,0.0010584,-1.44475e-06,1.26415e-08,0.499812,0.00105555,-1.40682e-06,2.48655e-08,0.500866,0.00105281,-1.33222e-06,-5.24988e-08,0.501918,0.00104999,-1.48972e-06,6.59206e-08,0.502966,0.00104721,-1.29196e-06,-3.237e-08,0.504012,0.00104453,-1.38907e-06,3.95479e-09,0.505055,0.00104176,-1.3772e-06,1.65509e-08,0.506096,0.00103905,-1.32755e-06,-1.05539e-08,0.507133,0.00103637,-1.35921e-06,2.56648e-08,0.508168,0.00103373,-1.28222e-06,-3.25007e-08,0.509201,0.00103106,-1.37972e-06,4.47336e-08,0.51023,0.00102844,-1.24552e-06,-2.72245e-08,0.511258,0.00102587,-1.32719e-06,4.55952e-09,0.512282,0.00102323,-1.31352e-06,8.98645e-09,0.513304,0.00102063,-1.28656e-06,1.90992e-08,0.514323,0.00101811,-1.22926e-06,-2.57786e-08,0.51534,0.00101557,-1.30659e-06,2.44104e-08,0.516355,0.00101303,-1.23336e-06,-1.22581e-08,0.517366,0.00101053,-1.27014e-06,2.4622e-08,0.518376,0.00100806,-1.19627e-06,-2.66253e-08,0.519383,0.00100559,-1.27615e-06,2.22744e-08,0.520387,0.00100311,-1.20932e-06,-2.8679e-09,0.521389,0.00100068,-1.21793e-06,-1.08029e-08,0.522388,0.000998211,-1.25034e-06,4.60795e-08,0.523385,0.000995849,-1.1121e-06,-5.4306e-08,0.52438,0.000993462,-1.27502e-06,5.19354e-08,0.525372,0.000991067,-1.11921e-06,-3.42262e-08,0.526362,0.000988726,-1.22189e-06,2.53646e-08,0.52735,0.000986359,-1.14579e-06,-7.62782e-09,0.528335,0.000984044,-1.16868e-06,5.14668e-09,0.529318,0.000981722,-1.15324e-06,-1.29589e-08,0.530298,0.000979377,-1.19211e-06,4.66888e-08,0.531276,0.000977133,-1.05205e-06,-5.45868e-08,0.532252,0.000974865,-1.21581e-06,5.24495e-08,0.533226,0.000972591,-1.05846e-06,-3.60019e-08,0.534198,0.000970366,-1.16647e-06,3.19537e-08,0.535167,0.000968129,-1.07061e-06,-3.2208e-08,0.536134,0.000965891,-1.16723e-06,3.72738e-08,0.537099,0.000963668,-1.05541e-06,2.32205e-09,0.538061,0.000961564,-1.04844e-06,-4.65618e-08,0.539022,0.000959328,-1.18813e-06,6.47159e-08,0.53998,0.000957146,-9.93979e-07,-3.3488e-08,0.540936,0.000955057,-1.09444e-06,9.63166e-09,0.54189,0.000952897,-1.06555e-06,-5.03871e-09,0.542842,0.000950751,-1.08066e-06,1.05232e-08,0.543792,0.000948621,-1.04909e-06,2.25503e-08,0.544739,0.000946591,-9.81444e-07,-4.11195e-08,0.545685,0.000944504,-1.1048e-06,2.27182e-08,0.546628,0.000942363,-1.03665e-06,9.85146e-09,0.54757,0.000940319,-1.00709e-06,-2.51938e-09,0.548509,0.000938297,-1.01465e-06,2.25858e-10,0.549446,0.000936269,-1.01397e-06,1.61598e-09,0.550381,0.000934246,-1.00913e-06,-6.68983e-09,0.551315,0.000932207,-1.0292e-06,2.51434e-08,0.552246,0.000930224,-9.53765e-07,-3.42793e-08,0.553175,0.000928214,-1.0566e-06,5.23688e-08,0.554102,0.000926258,-8.99497e-07,-5.59865e-08,0.555028,0.000924291,-1.06746e-06,5.23679e-08,0.555951,0.000922313,-9.10352e-07,-3.42763e-08,0.556872,0.00092039,-1.01318e-06,2.51326e-08,0.557792,0.000918439,-9.37783e-07,-6.64954e-09,0.558709,0.000916543,-9.57732e-07,1.46554e-09,0.559625,0.000914632,-9.53335e-07,7.87281e-10,0.560538,0.000912728,-9.50973e-07,-4.61466e-09,0.56145,0.000910812,-9.64817e-07,1.76713e-08,0.56236,0.000908935,-9.11804e-07,-6.46564e-09,0.563268,0.000907092,-9.312e-07,8.19121e-09,0.564174,0.000905255,-9.06627e-07,-2.62992e-08,0.565078,0.000903362,-9.85524e-07,3.74007e-08,0.565981,0.000901504,-8.73322e-07,-4.0942e-09,0.566882,0.000899745,-8.85605e-07,-2.1024e-08,0.56778,0.00089791,-9.48677e-07,2.85854e-08,0.568677,0.000896099,-8.62921e-07,-3.3713e-08,0.569573,0.000894272,-9.64059e-07,4.6662e-08,0.570466,0.000892484,-8.24073e-07,-3.37258e-08,0.571358,0.000890734,-9.25251e-07,2.86365e-08,0.572247,0.00088897,-8.39341e-07,-2.12155e-08,0.573135,0.000887227,-9.02988e-07,-3.37913e-09,0.574022,0.000885411,-9.13125e-07,3.47319e-08,0.574906,0.000883689,-8.08929e-07,-1.63394e-08,0.575789,0.000882022,-8.57947e-07,-2.8979e-08,0.57667,0.00088022,-9.44885e-07,7.26509e-08,0.57755,0.000878548,-7.26932e-07,-8.28106e-08,0.578427,0.000876845,-9.75364e-07,7.97774e-08,0.579303,0.000875134,-7.36032e-07,-5.74849e-08,0.580178,0.00087349,-9.08486e-07,3.09529e-08,0.58105,0.000871765,-8.15628e-07,-6.72206e-09,0.581921,0.000870114,-8.35794e-07,-4.06451e-09,0.582791,0.00086843,-8.47987e-07,2.29799e-08,0.583658,0.000866803,-7.79048e-07,-2.82503e-08,0.584524,0.00086516,-8.63799e-07,3.04167e-08,0.585388,0.000863524,-7.72548e-07,-3.38119e-08,0.586251,0.000861877,-8.73984e-07,4.52264e-08,0.587112,0.000860265,-7.38305e-07,-2.78842e-08,0.587972,0.000858705,-8.21958e-07,6.70567e-09,0.58883,0.000857081,-8.01841e-07,1.06161e-09,0.589686,0.000855481,-7.98656e-07,-1.09521e-08,0.590541,0.00085385,-8.31512e-07,4.27468e-08,0.591394,0.000852316,-7.03272e-07,-4.08257e-08,0.592245,0.000850787,-8.25749e-07,1.34677e-09,0.593095,0.000849139,-8.21709e-07,3.54387e-08,0.593944,0.000847602,-7.15393e-07,-2.38924e-08,0.59479,0.0008461,-7.8707e-07,5.26143e-10,0.595636,0.000844527,-7.85491e-07,2.17879e-08,0.596479,0.000843021,-7.20127e-07,-2.80733e-08,0.597322,0.000841497,-8.04347e-07,3.09005e-08,0.598162,0.000839981,-7.11646e-07,-3.5924e-08,0.599002,0.00083845,-8.19418e-07,5.3191e-08,0.599839,0.000836971,-6.59845e-07,-5.76307e-08,0.600676,0.000835478,-8.32737e-07,5.81227e-08,0.60151,0.000833987,-6.58369e-07,-5.56507e-08,0.602344,0.000832503,-8.25321e-07,4.52706e-08,0.603175,0.000830988,-6.89509e-07,-6.22236e-09,0.604006,0.000829591,-7.08176e-07,-2.03811e-08,0.604834,0.000828113,-7.6932e-07,2.8142e-08,0.605662,0.000826659,-6.84894e-07,-3.25822e-08,0.606488,0.000825191,-7.8264e-07,4.25823e-08,0.607312,0.000823754,-6.54893e-07,-1.85376e-08,0.608135,0.000822389,-7.10506e-07,-2.80365e-08,0.608957,0.000820883,-7.94616e-07,7.1079e-08,0.609777,0.000819507,-5.81379e-07,-7.74655e-08,0.610596,0.000818112,-8.13775e-07,5.9969e-08,0.611413,0.000816665,-6.33868e-07,-4.32013e-08,0.612229,0.000815267,-7.63472e-07,5.32313e-08,0.613044,0.0008139,-6.03778e-07,-5.05148e-08,0.613857,0.000812541,-7.55323e-07,2.96187e-08,0.614669,0.000811119,-6.66466e-07,-8.35545e-09,0.615479,0.000809761,-6.91533e-07,3.80301e-09,0.616288,0.00080839,-6.80124e-07,-6.85666e-09,0.617096,0.000807009,-7.00694e-07,2.36237e-08,0.617903,0.000805678,-6.29822e-07,-2.80336e-08,0.618708,0.000804334,-7.13923e-07,2.8906e-08,0.619511,0.000802993,-6.27205e-07,-2.79859e-08,0.620314,0.000801655,-7.11163e-07,2.34329e-08,0.621114,0.000800303,-6.40864e-07,-6.14108e-09,0.621914,0.000799003,-6.59287e-07,1.13151e-09,0.622712,0.000797688,-6.55893e-07,1.61507e-09,0.62351,0.000796381,-6.51048e-07,-7.59186e-09,0.624305,0.000795056,-6.73823e-07,2.87524e-08,0.6251,0.000793794,-5.87566e-07,-4.7813e-08,0.625893,0.000792476,-7.31005e-07,4.32901e-08,0.626685,0.000791144,-6.01135e-07,-6.13814e-09,0.627475,0.000789923,-6.19549e-07,-1.87376e-08,0.628264,0.000788628,-6.75762e-07,2.14837e-08,0.629052,0.000787341,-6.11311e-07,-7.59265e-09,0.629839,0.000786095,-6.34089e-07,8.88692e-09,0.630625,0.000784854,-6.07428e-07,-2.7955e-08,0.631409,0.000783555,-6.91293e-07,4.33285e-08,0.632192,0.000782302,-5.61307e-07,-2.61497e-08,0.632973,0.000781101,-6.39757e-07,1.6658e-09,0.633754,0.000779827,-6.34759e-07,1.94866e-08,0.634533,0.000778616,-5.76299e-07,-2.00076e-08,0.635311,0.000777403,-6.36322e-07,9.39091e-10,0.636088,0.000776133,-6.33505e-07,1.62512e-08,0.636863,0.000774915,-5.84751e-07,-6.33937e-09,0.637638,0.000773726,-6.03769e-07,9.10609e-09,0.638411,0.000772546,-5.76451e-07,-3.00849e-08,0.639183,0.000771303,-6.66706e-07,5.1629e-08,0.639953,0.000770125,-5.11819e-07,-5.7222e-08,0.640723,0.000768929,-6.83485e-07,5.80497e-08,0.641491,0.000767736,-5.09336e-07,-5.57674e-08,0.642259,0.000766551,-6.76638e-07,4.58105e-08,0.643024,0.000765335,-5.39206e-07,-8.26541e-09,0.643789,0.000764231,-5.64002e-07,-1.27488e-08,0.644553,0.000763065,-6.02249e-07,-3.44168e-10,0.645315,0.00076186,-6.03281e-07,1.41254e-08,0.646077,0.000760695,-5.60905e-07,3.44727e-09,0.646837,0.000759584,-5.50563e-07,-2.79144e-08,0.647596,0.000758399,-6.34307e-07,4.86057e-08,0.648354,0.000757276,-4.88489e-07,-4.72989e-08,0.64911,0.000756158,-6.30386e-07,2.13807e-08,0.649866,0.000754961,-5.66244e-07,2.13808e-08,0.65062,0.000753893,-5.02102e-07,-4.7299e-08,0.651374,0.000752746,-6.43999e-07,4.86059e-08,0.652126,0.000751604,-4.98181e-07,-2.79154e-08,0.652877,0.000750524,-5.81927e-07,3.45089e-09,0.653627,0.000749371,-5.71575e-07,1.41119e-08,0.654376,0.00074827,-5.29239e-07,-2.93748e-10,0.655123,0.00074721,-5.3012e-07,-1.29368e-08,0.65587,0.000746111,-5.68931e-07,-7.56355e-09,0.656616,0.000744951,-5.91621e-07,4.3191e-08,0.65736,0.000743897,-4.62048e-07,-4.59911e-08,0.658103,0.000742835,-6.00022e-07,2.15642e-08,0.658846,0.0007417,-5.35329e-07,1.93389e-08,0.659587,0.000740687,-4.77312e-07,-3.93152e-08,0.660327,0.000739615,-5.95258e-07,1.87126e-08,0.661066,0.00073848,-5.3912e-07,2.40695e-08,0.661804,0.000737474,-4.66912e-07,-5.53859e-08,0.662541,0.000736374,-6.33069e-07,7.82648e-08,0.663277,0.000735343,-3.98275e-07,-7.88593e-08,0.664012,0.00073431,-6.34853e-07,5.83585e-08,0.664745,0.000733215,-4.59777e-07,-3.53656e-08,0.665478,0.000732189,-5.65874e-07,2.34994e-08,0.66621,0.000731128,-4.95376e-07,9.72743e-10,0.66694,0.00073014,-4.92458e-07,-2.73903e-08,0.66767,0.000729073,-5.74629e-07,4.89839e-08,0.668398,0.000728071,-4.27677e-07,-4.93359e-08,0.669126,0.000727068,-5.75685e-07,2.91504e-08,0.669853,0.000726004,-4.88234e-07,-7.66109e-09,0.670578,0.000725004,-5.11217e-07,1.49392e-09,0.671303,0.000723986,-5.06735e-07,1.68533e-09,0.672026,0.000722978,-5.01679e-07,-8.23525e-09,0.672749,0.00072195,-5.26385e-07,3.12556e-08,0.67347,0.000720991,-4.32618e-07,-5.71825e-08,0.674191,0.000719954,-6.04166e-07,7.8265e-08,0.67491,0.00071898,-3.69371e-07,-7.70634e-08,0.675628,0.00071801,-6.00561e-07,5.11747e-08,0.676346,0.000716963,-4.47037e-07,-8.42615e-09,0.677062,0.000716044,-4.72315e-07,-1.747e-08,0.677778,0.000715046,-5.24725e-07,1.87015e-08,0.678493,0.000714053,-4.68621e-07,2.26856e-09,0.679206,0.000713123,-4.61815e-07,-2.77758e-08,0.679919,0.000712116,-5.45142e-07,4.92298e-08,0.68063,0.000711173,-3.97453e-07,-4.99339e-08,0.681341,0.000710228,-5.47255e-07,3.12967e-08,0.682051,0.000709228,-4.53365e-07,-1.56481e-08,0.68276,0.000708274,-5.00309e-07,3.12958e-08,0.683467,0.000707367,-4.06422e-07,-4.99303e-08,0.684174,0.000706405,-5.56213e-07,4.9216e-08,0.68488,0.00070544,-4.08565e-07,-2.77245e-08,0.685585,0.00070454,-4.91738e-07,2.07748e-09,0.686289,0.000703562,-4.85506e-07,1.94146e-08,0.686992,0.00070265,-4.27262e-07,-2.01314e-08,0.687695,0.000701735,-4.87656e-07,1.50616e-09,0.688396,0.000700764,-4.83137e-07,1.41067e-08,0.689096,0.00069984,-4.40817e-07,1.67168e-09,0.689795,0.000698963,-4.35802e-07,-2.07934e-08,0.690494,0.000698029,-4.98182e-07,2.18972e-08,0.691192,0.000697099,-4.32491e-07,-7.19092e-09,0.691888,0.000696212,-4.54064e-07,6.86642e-09,0.692584,0.000695325,-4.33464e-07,-2.02747e-08,0.693279,0.000694397,-4.94288e-07,1.46279e-08,0.693973,0.000693452,-4.50405e-07,2.13678e-08,0.694666,0.000692616,-3.86301e-07,-4.04945e-08,0.695358,0.000691721,-5.07785e-07,2.14009e-08,0.696049,0.00069077,-4.43582e-07,1.44955e-08,0.69674,0.000689926,-4.00096e-07,-1.97783e-08,0.697429,0.000689067,-4.5943e-07,5.01296e-09,0.698118,0.000688163,-4.44392e-07,-2.73521e-10,0.698805,0.000687273,-4.45212e-07,-3.91893e-09,0.699492,0.000686371,-4.56969e-07,1.59493e-08,0.700178,0.000685505,-4.09121e-07,-2.73351e-10,0.700863,0.000684686,-4.09941e-07,-1.4856e-08,0.701548,0.000683822,-4.54509e-07,9.25979e-11,0.702231,0.000682913,-4.54231e-07,1.44855e-08,0.702913,0.000682048,-4.10775e-07,1.56992e-09,0.703595,0.000681231,-4.06065e-07,-2.07652e-08,0.704276,0.000680357,-4.68361e-07,2.18864e-08,0.704956,0.000679486,-4.02701e-07,-7.17595e-09,0.705635,0.000678659,-4.24229e-07,6.81748e-09,0.706313,0.000677831,-4.03777e-07,-2.0094e-08,0.70699,0.000676963,-4.64059e-07,1.39538e-08,0.707667,0.000676077,-4.22197e-07,2.38835e-08,0.708343,0.000675304,-3.50547e-07,-4.98831e-08,0.709018,0.000674453,-5.00196e-07,5.64395e-08,0.709692,0.000673622,-3.30878e-07,-5.66657e-08,0.710365,0.00067279,-5.00875e-07,5.1014e-08,0.711037,0.000671942,-3.47833e-07,-2.81809e-08,0.711709,0.000671161,-4.32376e-07,2.10513e-09,0.712379,0.000670303,-4.2606e-07,1.97604e-08,0.713049,0.00066951,-3.66779e-07,-2.15422e-08,0.713718,0.000668712,-4.31406e-07,6.8038e-09,0.714387,0.000667869,-4.10994e-07,-5.67295e-09,0.715054,0.00066703,-4.28013e-07,1.5888e-08,0.715721,0.000666222,-3.80349e-07,1.72576e-09,0.716387,0.000665467,-3.75172e-07,-2.27911e-08,0.717052,0.000664648,-4.43545e-07,2.9834e-08,0.717716,0.00066385,-3.54043e-07,-3.69401e-08,0.718379,0.000663031,-4.64864e-07,5.83219e-08,0.719042,0.000662277,-2.89898e-07,-7.71382e-08,0.719704,0.000661465,-5.21313e-07,7.14171e-08,0.720365,0.000660637,-3.07061e-07,-2.97161e-08,0.721025,0.000659934,-3.96209e-07,-1.21575e-08,0.721685,0.000659105,-4.32682e-07,1.87412e-08,0.722343,0.000658296,-3.76458e-07,-3.2029e-09,0.723001,0.000657533,-3.86067e-07,-5.9296e-09,0.723659,0.000656743,-4.03856e-07,2.69213e-08,0.724315,0.000656016,-3.23092e-07,-4.21511e-08,0.724971,0.000655244,-4.49545e-07,2.24737e-08,0.725625,0.000654412,-3.82124e-07,1.18611e-08,0.726279,0.000653683,-3.46541e-07,-1.03132e-08,0.726933,0.000652959,-3.7748e-07,-3.02128e-08,0.727585,0.000652114,-4.68119e-07,7.15597e-08,0.728237,0.000651392,-2.5344e-07,-7.72119e-08,0.728888,0.000650654,-4.85075e-07,5.8474e-08,0.729538,0.000649859,-3.09654e-07,-3.74746e-08,0.730188,0.000649127,-4.22077e-07,3.18197e-08,0.730837,0.000648379,-3.26618e-07,-3.01997e-08,0.731485,0.000647635,-4.17217e-07,2.93747e-08,0.732132,0.000646888,-3.29093e-07,-2.76943e-08,0.732778,0.000646147,-4.12176e-07,2.17979e-08,0.733424,0.000645388,-3.46783e-07,1.07292e-10,0.734069,0.000644695,-3.46461e-07,-2.22271e-08,0.734713,0.000643935,-4.13142e-07,2.91963e-08,0.735357,0.000643197,-3.25553e-07,-3.49536e-08,0.736,0.000642441,-4.30414e-07,5.10133e-08,0.736642,0.000641733,-2.77374e-07,-4.98904e-08,0.737283,0.000641028,-4.27045e-07,2.93392e-08,0.737924,0.000640262,-3.39028e-07,-7.86156e-09,0.738564,0.000639561,-3.62612e-07,2.10703e-09,0.739203,0.000638842,-3.56291e-07,-5.6653e-10,0.739842,0.000638128,-3.57991e-07,1.59086e-10,0.740479,0.000637412,-3.57513e-07,-6.98321e-11,0.741116,0.000636697,-3.57723e-07,1.20214e-10,0.741753,0.000635982,-3.57362e-07,-4.10987e-10,0.742388,0.000635266,-3.58595e-07,1.5237e-09,0.743023,0.000634553,-3.54024e-07,-5.68376e-09,0.743657,0.000633828,-3.71075e-07,2.12113e-08,0.744291,0.00063315,-3.07441e-07,-1.95569e-08,0.744924,0.000632476,-3.66112e-07,-2.58816e-09,0.745556,0.000631736,-3.73877e-07,2.99096e-08,0.746187,0.000631078,-2.84148e-07,-5.74454e-08,0.746818,0.000630337,-4.56484e-07,8.06629e-08,0.747448,0.000629666,-2.14496e-07,-8.63922e-08,0.748077,0.000628978,-4.73672e-07,8.60918e-08,0.748706,0.000628289,-2.15397e-07,-7.91613e-08,0.749334,0.000627621,-4.5288e-07,5.17393e-08,0.749961,0.00062687,-2.97663e-07,-8.58662e-09,0.750588,0.000626249,-3.23422e-07,-1.73928e-08,0.751214,0.00062555,-3.75601e-07,1.85532e-08,0.751839,0.000624855,-3.19941e-07,2.78479e-09,0.752463,0.000624223,-3.11587e-07,-2.96923e-08,0.753087,0.000623511,-4.00664e-07,5.63799e-08,0.75371,0.000622879,-2.31524e-07,-7.66179e-08,0.754333,0.000622186,-4.61378e-07,7.12778e-08,0.754955,0.000621477,-2.47545e-07,-2.96794e-08,0.755576,0.000620893,-3.36583e-07,-1.21648e-08,0.756196,0.000620183,-3.73077e-07,1.87339e-08,0.756816,0.000619493,-3.16875e-07,-3.16622e-09,0.757435,0.00061885,-3.26374e-07,-6.0691e-09,0.758054,0.000618179,-3.44581e-07,2.74426e-08,0.758672,0.000617572,-2.62254e-07,-4.40968e-08,0.759289,0.000616915,-3.94544e-07,2.97352e-08,0.759906,0.000616215,-3.05338e-07,-1.52393e-08,0.760522,0.000615559,-3.51056e-07,3.12221e-08,0.761137,0.000614951,-2.5739e-07,-5.00443e-08,0.761751,0.000614286,-4.07523e-07,4.9746e-08,0.762365,0.00061362,-2.58285e-07,-2.97303e-08,0.762979,0.000613014,-3.47476e-07,9.57079e-09,0.763591,0.000612348,-3.18764e-07,-8.55287e-09,0.764203,0.000611685,-3.44422e-07,2.46407e-08,0.764815,0.00061107,-2.705e-07,-3.04053e-08,0.765426,0.000610437,-3.61716e-07,3.73759e-08,0.766036,0.000609826,-2.49589e-07,-5.94935e-08,0.766645,0.000609149,-4.28069e-07,8.13889e-08,0.767254,0.000608537,-1.83902e-07,-8.72483e-08,0.767862,0.000607907,-4.45647e-07,8.87901e-08,0.76847,0.000607282,-1.79277e-07,-8.90983e-08,0.769077,0.000606656,-4.46572e-07,8.87892e-08,0.769683,0.000606029,-1.80204e-07,-8.72446e-08,0.770289,0.000605407,-4.41938e-07,8.13752e-08,0.770894,0.000604768,-1.97812e-07,-5.94423e-08,0.771498,0.000604194,-3.76139e-07,3.71848e-08,0.772102,0.000603553,-2.64585e-07,-2.96922e-08,0.772705,0.000602935,-3.53661e-07,2.19793e-08,0.773308,0.000602293,-2.87723e-07,1.37955e-09,0.77391,0.000601722,-2.83585e-07,-2.74976e-08,0.774512,0.000601072,-3.66077e-07,4.9006e-08,0.775112,0.000600487,-2.19059e-07,-4.93171e-08,0.775712,0.000599901,-3.67011e-07,2.90531e-08,0.776312,0.000599254,-2.79851e-07,-7.29081e-09,0.776911,0.000598673,-3.01724e-07,1.10077e-10,0.777509,0.00059807,-3.01393e-07,6.85053e-09,0.778107,0.000597487,-2.80842e-07,-2.75123e-08,0.778704,0.000596843,-3.63379e-07,4.35939e-08,0.779301,0.000596247,-2.32597e-07,-2.7654e-08,0.779897,0.000595699,-3.15559e-07,7.41741e-09,0.780492,0.00059509,-2.93307e-07,-2.01562e-09,0.781087,0.000594497,-2.99354e-07,6.45059e-10,0.781681,0.000593901,-2.97418e-07,-5.64635e-10,0.782275,0.000593304,-2.99112e-07,1.61347e-09,0.782868,0.000592711,-2.94272e-07,-5.88926e-09,0.78346,0.000592105,-3.1194e-07,2.19436e-08,0.784052,0.000591546,-2.46109e-07,-2.22805e-08,0.784643,0.000590987,-3.1295e-07,7.57368e-09,0.785234,0.000590384,-2.90229e-07,-8.01428e-09,0.785824,0.00058978,-3.14272e-07,2.44834e-08,0.786414,0.000589225,-2.40822e-07,-3.03148e-08,0.787003,0.000588652,-3.31766e-07,3.7171e-08,0.787591,0.0005881,-2.20253e-07,-5.87646e-08,0.788179,0.000587483,-3.96547e-07,7.86782e-08,0.788766,0.000586926,-1.60512e-07,-7.71342e-08,0.789353,0.000586374,-3.91915e-07,5.10444e-08,0.789939,0.000585743,-2.38782e-07,-7.83422e-09,0.790524,0.000585242,-2.62284e-07,-1.97076e-08,0.791109,0.000584658,-3.21407e-07,2.70598e-08,0.791693,0.000584097,-2.40228e-07,-2.89269e-08,0.792277,0.000583529,-3.27008e-07,2.90431e-08,0.792861,0.000582963,-2.39879e-07,-2.76409e-08,0.793443,0.0005824,-3.22802e-07,2.1916e-08,0.794025,0.00058182,-2.57054e-07,-4.18368e-10,0.794607,0.000581305,-2.58309e-07,-2.02425e-08,0.795188,0.000580727,-3.19036e-07,2.17838e-08,0.795768,0.000580155,-2.53685e-07,-7.28814e-09,0.796348,0.000579625,-2.75549e-07,7.36871e-09,0.796928,0.000579096,-2.53443e-07,-2.21867e-08,0.797506,0.000578523,-3.20003e-07,2.17736e-08,0.798085,0.000577948,-2.54683e-07,-5.30296e-09,0.798662,0.000577423,-2.70592e-07,-5.61698e-10,0.799239,0.00057688,-2.72277e-07,7.54977e-09,0.799816,0.000576358,-2.49627e-07,-2.96374e-08,0.800392,0.00057577,-3.38539e-07,5.1395e-08,0.800968,0.000575247,-1.84354e-07,-5.67335e-08,0.801543,0.000574708,-3.54555e-07,5.63297e-08,0.802117,0.000574168,-1.85566e-07,-4.93759e-08,0.802691,0.000573649,-3.33693e-07,2.19646e-08,0.803264,0.000573047,-2.678e-07,2.1122e-08,0.803837,0.000572575,-2.04433e-07,-4.68482e-08,0.804409,0.000572026,-3.44978e-07,4.70613e-08,0.804981,0.000571477,-2.03794e-07,-2.21877e-08,0.805552,0.000571003,-2.70357e-07,-1.79153e-08,0.806123,0.000570408,-3.24103e-07,3.42443e-08,0.806693,0.000569863,-2.2137e-07,1.47556e-10,0.807263,0.000569421,-2.20928e-07,-3.48345e-08,0.807832,0.000568874,-3.25431e-07,1.99812e-08,0.808401,0.000568283,-2.65487e-07,1.45143e-08,0.808969,0.000567796,-2.21945e-07,-1.84338e-08,0.809536,0.000567297,-2.77246e-07,-3.83608e-10,0.810103,0.000566741,-2.78397e-07,1.99683e-08,0.81067,0.000566244,-2.18492e-07,-1.98848e-08,0.811236,0.000565747,-2.78146e-07,-3.38976e-11,0.811801,0.000565191,-2.78248e-07,2.00204e-08,0.812366,0.000564695,-2.18187e-07,-2.04429e-08,0.812931,0.000564197,-2.79516e-07,2.1467e-09,0.813495,0.000563644,-2.73076e-07,1.18561e-08,0.814058,0.000563134,-2.37507e-07,1.00334e-08,0.814621,0.000562689,-2.07407e-07,-5.19898e-08,0.815183,0.000562118,-3.63376e-07,7.87163e-08,0.815745,0.000561627,-1.27227e-07,-8.40616e-08,0.816306,0.000561121,-3.79412e-07,7.87163e-08,0.816867,0.000560598,-1.43263e-07,-5.19898e-08,0.817428,0.000560156,-2.99233e-07,1.00335e-08,0.817988,0.000559587,-2.69132e-07,1.18559e-08,0.818547,0.000559085,-2.33564e-07,2.14764e-09,0.819106,0.000558624,-2.27122e-07,-2.04464e-08,0.819664,0.000558108,-2.88461e-07,2.00334e-08,0.820222,0.000557591,-2.28361e-07,-8.24277e-11,0.820779,0.000557135,-2.28608e-07,-1.97037e-08,0.821336,0.000556618,-2.87719e-07,1.92925e-08,0.821893,0.000556101,-2.29841e-07,2.13831e-09,0.822448,0.000555647,-2.23427e-07,-2.78458e-08,0.823004,0.000555117,-3.06964e-07,4.96402e-08,0.823559,0.000554652,-1.58043e-07,-5.15058e-08,0.824113,0.000554181,-3.12561e-07,3.71737e-08,0.824667,0.000553668,-2.0104e-07,-3.75844e-08,0.82522,0.000553153,-3.13793e-07,5.35592e-08,0.825773,0.000552686,-1.53115e-07,-5.74431e-08,0.826326,0.000552207,-3.25444e-07,5.7004e-08,0.826878,0.000551728,-1.54433e-07,-5.13635e-08,0.827429,0.000551265,-3.08523e-07,2.92406e-08,0.82798,0.000550735,-2.20801e-07,-5.99424e-09,0.828531,0.000550276,-2.38784e-07,-5.26363e-09,0.829081,0.000549782,-2.54575e-07,2.70488e-08,0.82963,0.000549354,-1.73429e-07,-4.33268e-08,0.83018,0.000548878,-3.03409e-07,2.7049e-08,0.830728,0.000548352,-2.22262e-07,-5.26461e-09,0.831276,0.000547892,-2.38056e-07,-5.99057e-09,0.831824,0.000547397,-2.56027e-07,2.92269e-08,0.832371,0.000546973,-1.68347e-07,-5.13125e-08,0.832918,0.000546482,-3.22284e-07,5.68139e-08,0.833464,0.000546008,-1.51843e-07,-5.67336e-08,0.83401,0.000545534,-3.22043e-07,5.09113e-08,0.834555,0.000545043,-1.6931e-07,-2.77022e-08,0.8351,0.000544621,-2.52416e-07,2.92924e-10,0.835644,0.000544117,-2.51537e-07,2.65305e-08,0.836188,0.000543694,-1.71946e-07,-4.68105e-08,0.836732,0.00054321,-3.12377e-07,4.15021e-08,0.837275,0.000542709,-1.87871e-07,1.13355e-11,0.837817,0.000542334,-1.87837e-07,-4.15474e-08,0.838359,0.000541833,-3.12479e-07,4.69691e-08,0.838901,0.000541349,-1.71572e-07,-2.71196e-08,0.839442,0.000540925,-2.52931e-07,1.90462e-09,0.839983,0.000540425,-2.47217e-07,1.95011e-08,0.840523,0.000539989,-1.88713e-07,-2.03045e-08,0.841063,0.00053955,-2.49627e-07,2.11216e-09,0.841602,0.000539057,-2.4329e-07,1.18558e-08,0.842141,0.000538606,-2.07723e-07,1.00691e-08,0.842679,0.000538221,-1.77516e-07,-5.21324e-08,0.843217,0.00053771,-3.33913e-07,7.92513e-08,0.843755,0.00053728,-9.6159e-08,-8.60587e-08,0.844292,0.000536829,-3.54335e-07,8.61696e-08,0.844828,0.000536379,-9.58263e-08,-7.98057e-08,0.845364,0.000535948,-3.35243e-07,5.42394e-08,0.8459,0.00053544,-1.72525e-07,-1.79426e-08,0.846435,0.000535041,-2.26353e-07,1.75308e-08,0.84697,0.000534641,-1.73761e-07,-5.21806e-08,0.847505,0.000534137,-3.30302e-07,7.19824e-08,0.848038,0.000533692,-1.14355e-07,-5.69349e-08,0.848572,0.000533293,-2.8516e-07,3.65479e-08,0.849105,0.000532832,-1.75516e-07,-2.96519e-08,0.849638,0.000532392,-2.64472e-07,2.2455e-08,0.85017,0.000531931,-1.97107e-07,-5.63451e-10,0.850702,0.000531535,-1.98797e-07,-2.02011e-08,0.851233,0.000531077,-2.59401e-07,2.17634e-08,0.851764,0.000530623,-1.94111e-07,-7.24794e-09,0.852294,0.000530213,-2.15854e-07,7.22832e-09,0.852824,0.000529803,-1.94169e-07,-2.16653e-08,0.853354,0.00052935,-2.59165e-07,1.98283e-08,0.853883,0.000528891,-1.9968e-07,1.95678e-09,0.854412,0.000528497,-1.9381e-07,-2.76554e-08,0.85494,0.000528027,-2.76776e-07,4.90603e-08,0.855468,0.00052762,-1.29596e-07,-4.93764e-08,0.855995,0.000527213,-2.77725e-07,2.92361e-08,0.856522,0.000526745,-1.90016e-07,-7.96341e-09,0.857049,0.000526341,-2.13907e-07,2.61752e-09,0.857575,0.000525922,-2.06054e-07,-2.50665e-09,0.8581,0.000525502,-2.13574e-07,7.40906e-09,0.858626,0.000525097,-1.91347e-07,-2.71296e-08,0.859151,0.000524633,-2.72736e-07,4.15048e-08,0.859675,0.000524212,-1.48221e-07,-1.96802e-08,0.860199,0.000523856,-2.07262e-07,-2.23886e-08,0.860723,0.000523375,-2.74428e-07,4.96299e-08,0.861246,0.000522975,-1.25538e-07,-5.69216e-08,0.861769,0.000522553,-2.96303e-07,5.88473e-08,0.862291,0.000522137,-1.19761e-07,-5.92584e-08,0.862813,0.00052172,-2.97536e-07,5.8977e-08,0.863334,0.000521301,-1.20605e-07,-5.74403e-08,0.863855,0.000520888,-2.92926e-07,5.15751e-08,0.864376,0.000520457,-1.38201e-07,-2.96506e-08,0.864896,0.000520091,-2.27153e-07,7.42277e-09,0.865416,0.000519659,-2.04885e-07,-4.05057e-11,0.865936,0.00051925,-2.05006e-07,-7.26074e-09,0.866455,0.000518818,-2.26788e-07,2.90835e-08,0.866973,0.000518451,-1.39538e-07,-4.94686e-08,0.867492,0.000518024,-2.87944e-07,4.95814e-08,0.868009,0.000517597,-1.39199e-07,-2.96479e-08,0.868527,0.000517229,-2.28143e-07,9.40539e-09,0.869044,0.000516801,-1.99927e-07,-7.9737e-09,0.86956,0.000516378,-2.23848e-07,2.24894e-08,0.870077,0.000515997,-1.5638e-07,-2.23793e-08,0.870592,0.000515617,-2.23517e-07,7.42302e-09,0.871108,0.000515193,-2.01248e-07,-7.31283e-09,0.871623,0.000514768,-2.23187e-07,2.18283e-08,0.872137,0.000514387,-1.57702e-07,-2.03959e-08,0.872652,0.000514011,-2.1889e-07,1.50711e-10,0.873165,0.000513573,-2.18437e-07,1.97931e-08,0.873679,0.000513196,-1.59058e-07,-1.97183e-08,0.874192,0.000512819,-2.18213e-07,-5.24324e-10,0.874704,0.000512381,-2.19786e-07,2.18156e-08,0.875217,0.000512007,-1.54339e-07,-2.71336e-08,0.875728,0.000511616,-2.3574e-07,2.71141e-08,0.87624,0.000511226,-1.54398e-07,-2.17182e-08,0.876751,0.000510852,-2.19552e-07,1.54131e-10,0.877262,0.000510414,-2.1909e-07,2.11017e-08,0.877772,0.000510039,-1.55785e-07,-2.49562e-08,0.878282,0.000509652,-2.30654e-07,1.91183e-08,0.878791,0.000509248,-1.73299e-07,8.08751e-09,0.8793,0.000508926,-1.49036e-07,-5.14684e-08,0.879809,0.000508474,-3.03441e-07,7.85766e-08,0.880317,0.000508103,-6.77112e-08,-8.40242e-08,0.880825,0.000507715,-3.19784e-07,7.87063e-08,0.881333,0.000507312,-8.36649e-08,-5.19871e-08,0.88184,0.000506988,-2.39626e-07,1.00327e-08,0.882346,0.000506539,-2.09528e-07,1.18562e-08,0.882853,0.000506156,-1.73959e-07,2.14703e-09,0.883359,0.000505814,-1.67518e-07,-2.04444e-08,0.883864,0.000505418,-2.28851e-07,2.00258e-08,0.88437,0.00050502,-1.68774e-07,-5.42855e-11,0.884874,0.000504682,-1.68937e-07,-1.98087e-08,0.885379,0.000504285,-2.28363e-07,1.96842e-08,0.885883,0.000503887,-1.6931e-07,6.76342e-10,0.886387,0.000503551,-1.67281e-07,-2.23896e-08,0.88689,0.000503149,-2.3445e-07,2.92774e-08,0.887393,0.000502768,-1.46618e-07,-3.51152e-08,0.887896,0.00050237,-2.51963e-07,5.15787e-08,0.888398,0.00050202,-9.72271e-08,-5.19903e-08,0.8889,0.00050167,-2.53198e-07,3.71732e-08,0.889401,0.000501275,-1.41678e-07,-3.70978e-08,0.889902,0.00050088,-2.52972e-07,5.16132e-08,0.890403,0.000500529,-9.81321e-08,-5.01459e-08,0.890903,0.000500183,-2.4857e-07,2.9761e-08,0.891403,0.000499775,-1.59287e-07,-9.29351e-09,0.891903,0.000499428,-1.87167e-07,7.41301e-09,0.892402,0.000499076,-1.64928e-07,-2.03585e-08,0.892901,0.000498685,-2.26004e-07,1.44165e-08,0.893399,0.000498276,-1.82754e-07,2.22974e-08,0.893898,0.000497978,-1.15862e-07,-4.40013e-08,0.894395,0.000497614,-2.47866e-07,3.44985e-08,0.894893,0.000497222,-1.44371e-07,-3.43882e-08,0.89539,0.00049683,-2.47535e-07,4.34497e-08,0.895886,0.000496465,-1.17186e-07,-2.02012e-08,0.896383,0.00049617,-1.7779e-07,-2.22497e-08,0.896879,0.000495748,-2.44539e-07,4.95952e-08,0.897374,0.000495408,-9.57532e-08,-5.69217e-08,0.89787,0.000495045,-2.66518e-07,5.88823e-08,0.898364,0.000494689,-8.98713e-08,-5.93983e-08,0.898859,0.000494331,-2.68066e-07,5.95017e-08,0.899353,0.000493973,-8.95613e-08,-5.9399e-08,0.899847,0.000493616,-2.67758e-07,5.8885e-08,0.90034,0.000493257,-9.11033e-08,-5.69317e-08,0.900833,0.000492904,-2.61898e-07,4.96326e-08,0.901326,0.000492529,-1.13001e-07,-2.23893e-08,0.901819,0.000492236,-1.80169e-07,-1.968e-08,0.902311,0.000491817,-2.39209e-07,4.15047e-08,0.902802,0.000491463,-1.14694e-07,-2.71296e-08,0.903293,0.000491152,-1.96083e-07,7.409e-09,0.903784,0.000490782,-1.73856e-07,-2.50645e-09,0.904275,0.000490427,-1.81376e-07,2.61679e-09,0.904765,0.000490072,-1.73525e-07,-7.96072e-09,0.905255,0.000489701,-1.97407e-07,2.92261e-08,0.905745,0.000489394,-1.09729e-07,-4.93389e-08,0.906234,0.000489027,-2.57746e-07,4.89204e-08,0.906723,0.000488658,-1.10985e-07,-2.71333e-08,0.907211,0.000488354,-1.92385e-07,8.30861e-12,0.907699,0.00048797,-1.9236e-07,2.71001e-08,0.908187,0.000487666,-1.1106e-07,-4.88041e-08,0.908675,0.000487298,-2.57472e-07,4.89069e-08,0.909162,0.000486929,-1.10751e-07,-2.76143e-08,0.909649,0.000486625,-1.93594e-07,1.9457e-09,0.910135,0.000486244,-1.87757e-07,1.98315e-08,0.910621,0.000485928,-1.28262e-07,-2.16671e-08,0.911107,0.000485606,-1.93264e-07,7.23216e-09,0.911592,0.000485241,-1.71567e-07,-7.26152e-09,0.912077,0.000484877,-1.93352e-07,2.18139e-08,0.912562,0.000484555,-1.2791e-07,-2.03895e-08,0.913047,0.000484238,-1.89078e-07,1.39494e-10,0.913531,0.000483861,-1.8866e-07,1.98315e-08,0.914014,0.000483543,-1.29165e-07,-1.98609e-08,0.914498,0.000483225,-1.88748e-07,7.39912e-12,0.914981,0.000482847,-1.88726e-07,1.98313e-08,0.915463,0.000482529,-1.29232e-07,-1.9728e-08,0.915946,0.000482212,-1.88416e-07,-5.24035e-10,0.916428,0.000481833,-1.89988e-07,2.18241e-08,0.916909,0.000481519,-1.24516e-07,-2.71679e-08,0.917391,0.000481188,-2.06019e-07,2.72427e-08,0.917872,0.000480858,-1.24291e-07,-2.21985e-08,0.918353,0.000480543,-1.90886e-07,1.94644e-09,0.918833,0.000480167,-1.85047e-07,1.44127e-08,0.919313,0.00047984,-1.41809e-07,7.39438e-12,0.919793,0.000479556,-1.41787e-07,-1.44423e-08,0.920272,0.000479229,-1.85114e-07,-1.84291e-09,0.920751,0.000478854,-1.90642e-07,2.18139e-08,0.92123,0.000478538,-1.25201e-07,-2.58081e-08,0.921708,0.00047821,-2.02625e-07,2.18139e-08,0.922186,0.00047787,-1.37183e-07,-1.84291e-09,0.922664,0.00047759,-1.42712e-07,-1.44423e-08,0.923141,0.000477262,-1.86039e-07,7.34701e-12,0.923618,0.00047689,-1.86017e-07,1.44129e-08,0.924095,0.000476561,-1.42778e-07,1.94572e-09,0.924572,0.000476281,-1.36941e-07,-2.21958e-08,0.925048,0.000475941,-2.03528e-07,2.72327e-08,0.925523,0.000475615,-1.2183e-07,-2.71304e-08,0.925999,0.00047529,-2.03221e-07,2.16843e-08,0.926474,0.000474949,-1.38168e-07,-2.16005e-12,0.926949,0.000474672,-1.38175e-07,-2.16756e-08,0.927423,0.000474331,-2.03202e-07,2.71001e-08,0.927897,0.000474006,-1.21902e-07,-2.71201e-08,0.928371,0.000473681,-2.03262e-07,2.17757e-08,0.928845,0.00047334,-1.37935e-07,-3.78028e-10,0.929318,0.000473063,-1.39069e-07,-2.02636e-08,0.929791,0.000472724,-1.9986e-07,2.18276e-08,0.930263,0.000472389,-1.34377e-07,-7.44231e-09,0.930736,0.000472098,-1.56704e-07,7.94165e-09,0.931208,0.000471809,-1.32879e-07,-2.43243e-08,0.931679,0.00047147,-2.05851e-07,2.97508e-08,0.932151,0.000471148,-1.16599e-07,-3.50742e-08,0.932622,0.000470809,-2.21822e-07,5.09414e-08,0.933092,0.000470518,-6.89976e-08,-4.94821e-08,0.933563,0.000470232,-2.17444e-07,2.77775e-08,0.934033,0.00046988,-1.34111e-07,-2.02351e-09,0.934502,0.000469606,-1.40182e-07,-1.96835e-08,0.934972,0.000469267,-1.99232e-07,2.11529e-08,0.935441,0.000468932,-1.35774e-07,-5.32332e-09,0.93591,0.000468644,-1.51743e-07,1.40413e-10,0.936378,0.000468341,-1.51322e-07,4.76166e-09,0.936846,0.000468053,-1.37037e-07,-1.9187e-08,0.937314,0.000467721,-1.94598e-07,1.23819e-08,0.937782,0.000467369,-1.57453e-07,2.92642e-08,0.938249,0.000467142,-6.96601e-08,-6.98342e-08,0.938716,0.000466793,-2.79163e-07,7.12586e-08,0.939183,0.000466449,-6.53869e-08,-3.63863e-08,0.939649,0.000466209,-1.74546e-07,1.46818e-08,0.940115,0.000465904,-1.305e-07,-2.2341e-08,0.940581,0.000465576,-1.97523e-07,1.50774e-08,0.941046,0.000465226,-1.52291e-07,2.16359e-08,0.941511,0.000464986,-8.73832e-08,-4.20162e-08,0.941976,0.000464685,-2.13432e-07,2.72198e-08,0.942441,0.00046434,-1.31773e-07,-7.2581e-09,0.942905,0.000464055,-1.53547e-07,1.81263e-09,0.943369,0.000463753,-1.48109e-07,7.58386e-12,0.943832,0.000463457,-1.48086e-07,-1.84298e-09,0.944296,0.000463155,-1.53615e-07,7.36433e-09,0.944759,0.00046287,-1.31522e-07,-2.76143e-08,0.945221,0.000462524,-2.14365e-07,4.34883e-08,0.945684,0.000462226,-8.39003e-08,-2.71297e-08,0.946146,0.000461977,-1.65289e-07,5.42595e-09,0.946608,0.000461662,-1.49012e-07,5.42593e-09,0.947069,0.000461381,-1.32734e-07,-2.71297e-08,0.94753,0.000461034,-2.14123e-07,4.34881e-08,0.947991,0.000460736,-8.36585e-08,-2.76134e-08,0.948452,0.000460486,-1.66499e-07,7.36083e-09,0.948912,0.000460175,-1.44416e-07,-1.82993e-09,0.949372,0.000459881,-1.49906e-07,-4.11073e-11,0.949832,0.000459581,-1.50029e-07,1.99434e-09,0.950291,0.000459287,-1.44046e-07,-7.93627e-09,0.950751,0.000458975,-1.67855e-07,2.97507e-08,0.951209,0.000458728,-7.86029e-08,-5.1462e-08,0.951668,0.000458417,-2.32989e-07,5.6888e-08,0.952126,0.000458121,-6.2325e-08,-5.68806e-08,0.952584,0.000457826,-2.32967e-07,5.14251e-08,0.953042,0.000457514,-7.86914e-08,-2.96107e-08,0.953499,0.000457268,-1.67523e-07,7.41296e-09,0.953956,0.000456955,-1.45285e-07,-4.11262e-11,0.954413,0.000456665,-1.45408e-07,-7.24847e-09,0.95487,0.000456352,-1.67153e-07,2.9035e-08,0.955326,0.000456105,-8.00484e-08,-4.92869e-08,0.955782,0.000455797,-2.27909e-07,4.89032e-08,0.956238,0.000455488,-8.11994e-08,-2.71166e-08,0.956693,0.000455244,-1.62549e-07,-4.13678e-11,0.957148,0.000454919,-1.62673e-07,2.72821e-08,0.957603,0.000454675,-8.0827e-08,-4.94824e-08,0.958057,0.000454365,-2.29274e-07,5.14382e-08,0.958512,0.000454061,-7.49597e-08,-3.7061e-08,0.958965,0.0004538,-1.86143e-07,3.72013e-08,0.959419,0.000453539,-7.45389e-08,-5.21396e-08,0.959873,0.000453234,-2.30958e-07,5.21476e-08,0.960326,0.000452928,-7.45146e-08,-3.72416e-08,0.960778,0.000452667,-1.8624e-07,3.72143e-08,0.961231,0.000452407,-7.45967e-08,-5.20109e-08,0.961683,0.000452101,-2.30629e-07,5.16199e-08,0.962135,0.000451795,-7.57696e-08,-3.52595e-08,0.962587,0.000451538,-1.81548e-07,2.98133e-08,0.963038,0.000451264,-9.2108e-08,-2.43892e-08,0.963489,0.000451007,-1.65276e-07,8.13892e-09,0.96394,0.000450701,-1.40859e-07,-8.16647e-09,0.964391,0.000450394,-1.65358e-07,2.45269e-08,0.964841,0.000450137,-9.17775e-08,-3.03367e-08,0.965291,0.000449863,-1.82787e-07,3.7215e-08,0.965741,0.000449609,-7.11424e-08,-5.89188e-08,0.96619,0.00044929,-2.47899e-07,7.92509e-08,0.966639,0.000449032,-1.01462e-08,-7.92707e-08,0.967088,0.000448773,-2.47958e-07,5.90181e-08,0.967537,0.000448455,-7.0904e-08,-3.75925e-08,0.967985,0.0004482,-1.83681e-07,3.17471e-08,0.968433,0.000447928,-8.84401e-08,-2.97913e-08,0.968881,0.000447662,-1.77814e-07,2.78133e-08,0.969329,0.000447389,-9.4374e-08,-2.18572e-08,0.969776,0.000447135,-1.59946e-07,1.10134e-11,0.970223,0.000446815,-1.59913e-07,2.18132e-08,0.97067,0.000446561,-9.44732e-08,-2.76591e-08,0.971116,0.000446289,-1.7745e-07,2.92185e-08,0.971562,0.000446022,-8.97948e-08,-2.96104e-08,0.972008,0.000445753,-1.78626e-07,2.96185e-08,0.972454,0.000445485,-8.97706e-08,-2.92588e-08,0.972899,0.000445218,-1.77547e-07,2.78123e-08,0.973344,0.000444946,-9.41103e-08,-2.23856e-08,0.973789,0.000444691,-1.61267e-07,2.12559e-09,0.974233,0.000444374,-1.5489e-07,1.38833e-08,0.974678,0.000444106,-1.13241e-07,1.94591e-09,0.975122,0.000443886,-1.07403e-07,-2.16669e-08,0.975565,0.000443606,-1.72404e-07,2.5117e-08,0.976009,0.000443336,-9.70526e-08,-1.91963e-08,0.976452,0.000443085,-1.54642e-07,-7.93627e-09,0.976895,0.000442752,-1.7845e-07,5.09414e-08,0.977338,0.000442548,-2.56262e-08,-7.66201e-08,0.97778,0.000442266,-2.55486e-07,7.67249e-08,0.978222,0.000441986,-2.53118e-08,-5.14655e-08,0.978664,0.000441781,-1.79708e-07,9.92773e-09,0.979106,0.000441451,-1.49925e-07,1.17546e-08,0.979547,0.000441186,-1.14661e-07,2.65868e-09,0.979988,0.000440965,-1.06685e-07,-2.23893e-08,0.980429,0.000440684,-1.73853e-07,2.72939e-08,0.980869,0.000440419,-9.19716e-08,-2.71816e-08,0.98131,0.000440153,-1.73516e-07,2.18278e-08,0.98175,0.000439872,-1.08033e-07,-5.24833e-10,0.982189,0.000439654,-1.09607e-07,-1.97284e-08,0.982629,0.000439376,-1.68793e-07,1.98339e-08,0.983068,0.000439097,-1.09291e-07,-2.62901e-12,0.983507,0.000438879,-1.09299e-07,-1.98234e-08,0.983946,0.000438601,-1.68769e-07,1.96916e-08,0.984384,0.000438322,-1.09694e-07,6.6157e-10,0.984823,0.000438105,-1.0771e-07,-2.23379e-08,0.985261,0.000437823,-1.74723e-07,2.90855e-08,0.985698,0.00043756,-8.74669e-08,-3.43992e-08,0.986136,0.000437282,-1.90665e-07,4.89068e-08,0.986573,0.000437048,-4.39442e-08,-4.20188e-08,0.98701,0.000436834,-1.7e-07,-4.11073e-11,0.987446,0.000436494,-1.70124e-07,4.21832e-08,0.987883,0.00043628,-4.35742e-08,-4.94824e-08,0.988319,0.000436044,-1.92021e-07,3.6537e-08,0.988755,0.00043577,-8.24102e-08,-3.70611e-08,0.989191,0.000435494,-1.93593e-07,5.21026e-08,0.989626,0.000435263,-3.72855e-08,-5.21402e-08,0.990061,0.000435032,-1.93706e-07,3.7249e-08,0.990496,0.000434756,-8.19592e-08,-3.72512e-08,0.990931,0.000434481,-1.93713e-07,5.21511e-08,0.991365,0.00043425,-3.72595e-08,-5.21439e-08,0.991799,0.000434019,-1.93691e-07,3.72152e-08,0.992233,0.000433743,-8.20456e-08,-3.71123e-08,0.992667,0.000433468,-1.93382e-07,5.16292e-08,0.9931,0.000433236,-3.84947e-08,-5.01953e-08,0.993533,0.000433008,-1.89081e-07,2.99427e-08,0.993966,0.00043272,-9.92525e-08,-9.9708e-09,0.994399,0.000432491,-1.29165e-07,9.94051e-09,0.994831,0.000432263,-9.93434e-08,-2.97912e-08,0.995263,0.000431975,-1.88717e-07,4.96198e-08,0.995695,0.000431746,-3.98578e-08,-4.94785e-08,0.996127,0.000431518,-1.88293e-07,2.9085e-08,0.996558,0.000431229,-1.01038e-07,-7.25675e-09,0.996989,0.000431005,-1.22809e-07,-5.79945e-11,0.99742,0.000430759,-1.22983e-07,7.48873e-09,0.997851,0.000430536,-1.00516e-07,-2.98969e-08,0.998281,0.000430245,-1.90207e-07,5.24942e-08,0.998711,0.000430022,-3.27246e-08,-6.08706e-08,0.999141,0.000429774,-2.15336e-07,7.17788e-08,0.999571,0.000429392,0.,0.}; + + template + __device__ __forceinline__ void Lab2RGBConvert_f(const T& src, D& dst) + { + const float lThresh = 0.008856f * 903.3f; + const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f; + + float Y, fy; + + if (src.x <= lThresh) + { + Y = src.x / 903.3f; + fy = 7.787f * Y + 16.0f / 116.0f; + } + else + { + fy = (src.x + 16.0f) / 116.0f; + Y = fy * fy * fy; + } + + float X = src.y / 500.0f + fy; + float Z = fy - src.z / 200.0f; + + if (X <= fThresh) + X = (X - 16.0f / 116.0f) / 7.787f; + else + X = X * X * X; + + if (Z <= fThresh) + Z = (Z - 16.0f / 116.0f) / 7.787f; + else + Z = Z * Z * Z; + + float B = 0.052891f * X - 0.204043f * Y + 1.151152f * Z; + float G = -0.921235f * X + 1.875991f * Y + 0.045244f * Z; + float R = 3.079933f * X - 1.537150f * Y - 0.542782f * Z; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + } + + dst.x = blueIdx == 0 ? B : R; + dst.y = G; + dst.z = blueIdx == 0 ? R : B; + setAlpha(dst, ColorChannel::max()); + } + + template + __device__ __forceinline__ void Lab2RGBConvert_b(const T& src, D& dst) + { + float3 srcf, dstf; + + srcf.x = src.x * (100.f / 255.f); + srcf.y = src.y - 128; + srcf.z = src.z - 128; + + Lab2RGBConvert_f(srcf, dstf); + + dst.x = saturate_cast(dstf.x * 255.f); + dst.y = saturate_cast(dstf.y * 255.f); + dst.z = saturate_cast(dstf.z * 255.f); + setAlpha(dst, ColorChannel::max()); + } + + template struct Lab2RGB; + template + struct Lab2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Lab2RGBConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} + }; + template + struct Lab2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Lab2RGBConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Lab2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> Luv ///////////////////////////////////// + + namespace color_detail + { + __constant__ float c_LabCbrtTab[] = {0.137931,0.0114066,0.,1.18859e-07,0.149338,0.011407,3.56578e-07,-5.79396e-07,0.160745,0.0114059,-1.38161e-06,2.16892e-06,0.172151,0.0114097,5.12516e-06,-8.0814e-06,0.183558,0.0113957,-1.9119e-05,3.01567e-05,0.194965,0.0114479,7.13509e-05,-0.000112545,0.206371,0.011253,-0.000266285,-0.000106493,0.217252,0.0104009,-0.000585765,7.32149e-05,0.22714,0.00944906,-0.00036612,1.21917e-05,0.236235,0.0087534,-0.000329545,2.01753e-05,0.244679,0.00815483,-0.000269019,1.24435e-05,0.252577,0.00765412,-0.000231689,1.05618e-05,0.26001,0.00722243,-0.000200003,8.26662e-06,0.267041,0.00684723,-0.000175203,6.76746e-06,0.27372,0.00651712,-0.000154901,5.61192e-06,0.280088,0.00622416,-0.000138065,4.67009e-06,0.286179,0.00596204,-0.000124055,3.99012e-06,0.292021,0.0057259,-0.000112085,3.36032e-06,0.297638,0.00551181,-0.000102004,2.95338e-06,0.30305,0.00531666,-9.31435e-05,2.52875e-06,0.308277,0.00513796,-8.55572e-05,2.22022e-06,0.313331,0.00497351,-7.88966e-05,1.97163e-06,0.318228,0.00482163,-7.29817e-05,1.7248e-06,0.322978,0.00468084,-6.78073e-05,1.55998e-06,0.327593,0.0045499,-6.31274e-05,1.36343e-06,0.332081,0.00442774,-5.90371e-05,1.27136e-06,0.336451,0.00431348,-5.5223e-05,1.09111e-06,0.34071,0.00420631,-5.19496e-05,1.0399e-06,0.344866,0.00410553,-4.88299e-05,9.18347e-07,0.348923,0.00401062,-4.60749e-05,8.29942e-07,0.352889,0.00392096,-4.35851e-05,7.98478e-07,0.356767,0.00383619,-4.11896e-05,6.84917e-07,0.360562,0.00375586,-3.91349e-05,6.63976e-07,0.36428,0.00367959,-3.7143e-05,5.93086e-07,0.367923,0.00360708,-3.53637e-05,5.6976e-07,0.371495,0.00353806,-3.36544e-05,4.95533e-07,0.375,0.00347224,-3.21678e-05,4.87951e-07,0.378441,0.00340937,-3.0704e-05,4.4349e-07,0.38182,0.00334929,-2.93735e-05,4.20297e-07,0.38514,0.0032918,-2.81126e-05,3.7872e-07,0.388404,0.00323671,-2.69764e-05,3.596e-07,0.391614,0.00318384,-2.58976e-05,3.5845e-07,0.394772,0.00313312,-2.48223e-05,2.92765e-07,0.397881,0.00308435,-2.3944e-05,3.18232e-07,0.400942,0.00303742,-2.29893e-05,2.82046e-07,0.403957,0.00299229,-2.21432e-05,2.52315e-07,0.406927,0.00294876,-2.13862e-05,2.58416e-07,0.409855,0.00290676,-2.0611e-05,2.33939e-07,0.412741,0.00286624,-1.99092e-05,2.36342e-07,0.415587,0.00282713,-1.92001e-05,1.916e-07,0.418396,0.00278931,-1.86253e-05,2.1915e-07,0.421167,0.00275271,-1.79679e-05,1.83498e-07,0.423901,0.00271733,-1.74174e-05,1.79343e-07,0.426602,0.00268303,-1.68794e-05,1.72013e-07,0.429268,0.00264979,-1.63633e-05,1.75686e-07,0.431901,0.00261759,-1.58363e-05,1.3852e-07,0.434503,0.00258633,-1.54207e-05,1.64304e-07,0.437074,0.00255598,-1.49278e-05,1.28136e-07,0.439616,0.00252651,-1.45434e-05,1.57618e-07,0.442128,0.0024979,-1.40705e-05,1.0566e-07,0.444612,0.00247007,-1.37535e-05,1.34998e-07,0.447068,0.00244297,-1.33485e-05,1.29207e-07,0.449498,0.00241666,-1.29609e-05,9.32347e-08,0.451902,0.00239102,-1.26812e-05,1.23703e-07,0.45428,0.00236603,-1.23101e-05,9.74072e-08,0.456634,0.0023417,-1.20179e-05,1.12518e-07,0.458964,0.002318,-1.16803e-05,7.83681e-08,0.46127,0.00229488,-1.14452e-05,1.10452e-07,0.463554,0.00227232,-1.11139e-05,7.58719e-08,0.465815,0.00225032,-1.08863e-05,9.2699e-08,0.468055,0.00222882,-1.06082e-05,8.97738e-08,0.470273,0.00220788,-1.03388e-05,5.4845e-08,0.47247,0.00218736,-1.01743e-05,1.0808e-07,0.474648,0.00216734,-9.85007e-06,4.9277e-08,0.476805,0.00214779,-9.70224e-06,8.22408e-08,0.478943,0.00212863,-9.45551e-06,6.87942e-08,0.481063,0.00210993,-9.24913e-06,5.98144e-08,0.483163,0.00209161,-9.06969e-06,7.93789e-08,0.485246,0.00207371,-8.83155e-06,3.99032e-08,0.487311,0.00205616,-8.71184e-06,8.88325e-08,0.489358,0.002039,-8.44534e-06,2.20004e-08,0.491389,0.00202218,-8.37934e-06,9.13872e-08,0.493403,0.0020057,-8.10518e-06,2.96829e-08,0.495401,0.00198957,-8.01613e-06,5.81028e-08,0.497382,0.00197372,-7.84183e-06,6.5731e-08,0.499348,0.00195823,-7.64463e-06,3.66019e-08,0.501299,0.00194305,-7.53483e-06,2.62811e-08,0.503234,0.00192806,-7.45598e-06,9.66907e-08,0.505155,0.00191344,-7.16591e-06,4.18928e-09,0.507061,0.00189912,-7.15334e-06,6.53665e-08,0.508953,0.00188501,-6.95724e-06,3.23686e-08,0.510831,0.00187119,-6.86014e-06,4.35774e-08,0.512696,0.0018576,-6.72941e-06,3.17406e-08,0.514547,0.00184424,-6.63418e-06,6.78785e-08,0.516384,0.00183117,-6.43055e-06,-5.23126e-09,0.518209,0.0018183,-6.44624e-06,7.22562e-08,0.520021,0.00180562,-6.22947e-06,1.42292e-08,0.52182,0.0017932,-6.18679e-06,4.9641e-08,0.523607,0.00178098,-6.03786e-06,2.56259e-08,0.525382,0.00176898,-5.96099e-06,2.66696e-08,0.527145,0.00175714,-5.88098e-06,4.65094e-08,0.528897,0.00174552,-5.74145e-06,2.57114e-08,0.530637,0.00173411,-5.66431e-06,2.94588e-08,0.532365,0.00172287,-5.57594e-06,3.52667e-08,0.534082,0.00171182,-5.47014e-06,8.28868e-09,0.535789,0.00170091,-5.44527e-06,5.07871e-08,0.537484,0.00169017,-5.29291e-06,2.69817e-08,0.539169,0.00167967,-5.21197e-06,2.01009e-08,0.540844,0.0016693,-5.15166e-06,1.18237e-08,0.542508,0.00165903,-5.11619e-06,5.18135e-08,0.544162,0.00164896,-4.96075e-06,1.9341e-08,0.545806,0.00163909,-4.90273e-06,-9.96867e-09,0.54744,0.00162926,-4.93263e-06,8.01382e-08,0.549064,0.00161963,-4.69222e-06,-1.25601e-08,0.550679,0.00161021,-4.7299e-06,2.97067e-08,0.552285,0.00160084,-4.64078e-06,1.29426e-08,0.553881,0.0015916,-4.60195e-06,3.77327e-08,0.555468,0.00158251,-4.48875e-06,1.49412e-08,0.557046,0.00157357,-4.44393e-06,2.17118e-08,0.558615,0.00156475,-4.3788e-06,1.74206e-08,0.560176,0.00155605,-4.32653e-06,2.78152e-08,0.561727,0.00154748,-4.24309e-06,-9.47239e-09,0.563271,0.00153896,-4.27151e-06,6.9679e-08,0.564805,0.00153063,-4.06247e-06,-3.08246e-08,0.566332,0.00152241,-4.15494e-06,5.36188e-08,0.56785,0.00151426,-3.99409e-06,-4.83594e-09,0.56936,0.00150626,-4.00859e-06,2.53293e-08,0.570863,0.00149832,-3.93261e-06,2.27286e-08,0.572357,0.00149052,-3.86442e-06,2.96541e-09,0.573844,0.0014828,-3.85552e-06,2.50147e-08,0.575323,0.00147516,-3.78048e-06,1.61842e-08,0.576794,0.00146765,-3.73193e-06,2.94582e-08,0.578258,0.00146028,-3.64355e-06,-1.48076e-08,0.579715,0.00145295,-3.68798e-06,2.97724e-08,0.581164,0.00144566,-3.59866e-06,1.49272e-08,0.582606,0.00143851,-3.55388e-06,2.97285e-08,0.584041,0.00143149,-3.46469e-06,-1.46323e-08,0.585469,0.00142451,-3.50859e-06,2.88004e-08,0.58689,0.00141758,-3.42219e-06,1.864e-08,0.588304,0.00141079,-3.36627e-06,1.58482e-08,0.589712,0.00140411,-3.31872e-06,-2.24279e-08,0.591112,0.00139741,-3.38601e-06,7.38639e-08,0.592507,0.00139085,-3.16441e-06,-3.46088e-08,0.593894,0.00138442,-3.26824e-06,4.96675e-09,0.595275,0.0013779,-3.25334e-06,7.4346e-08,0.59665,0.00137162,-3.0303e-06,-6.39319e-08,0.598019,0.00136536,-3.2221e-06,6.21725e-08,0.599381,0.00135911,-3.03558e-06,-5.94423e-09,0.600737,0.00135302,-3.05341e-06,2.12091e-08,0.602087,0.00134697,-2.98979e-06,-1.92876e-08,0.603431,0.00134094,-3.04765e-06,5.5941e-08,0.604769,0.00133501,-2.87983e-06,-2.56622e-08,0.606101,0.00132917,-2.95681e-06,4.67078e-08,0.607427,0.0013234,-2.81669e-06,-4.19592e-08,0.608748,0.00131764,-2.94257e-06,6.15243e-08,0.610062,0.00131194,-2.75799e-06,-2.53244e-08,0.611372,0.00130635,-2.83397e-06,3.97739e-08,0.612675,0.0013008,-2.71465e-06,-1.45618e-08,0.613973,0.00129533,-2.75833e-06,1.84733e-08,0.615266,0.00128986,-2.70291e-06,2.73606e-10,0.616553,0.00128446,-2.70209e-06,4.00367e-08,0.617835,0.00127918,-2.58198e-06,-4.12113e-08,0.619111,0.00127389,-2.70561e-06,6.52039e-08,0.620383,0.00126867,-2.51e-06,-4.07901e-08,0.621649,0.00126353,-2.63237e-06,3.83516e-08,0.62291,0.00125838,-2.51732e-06,6.59315e-09,0.624166,0.00125337,-2.49754e-06,-5.11939e-09,0.625416,0.00124836,-2.5129e-06,1.38846e-08,0.626662,0.00124337,-2.47124e-06,9.18514e-09,0.627903,0.00123846,-2.44369e-06,8.97952e-09,0.629139,0.0012336,-2.41675e-06,1.45012e-08,0.63037,0.00122881,-2.37325e-06,-7.37949e-09,0.631597,0.00122404,-2.39538e-06,1.50169e-08,0.632818,0.00121929,-2.35033e-06,6.91648e-09,0.634035,0.00121461,-2.32958e-06,1.69219e-08,0.635248,0.00121,-2.27882e-06,-1.49997e-08,0.636455,0.0012054,-2.32382e-06,4.30769e-08,0.637659,0.00120088,-2.19459e-06,-3.80986e-08,0.638857,0.00119638,-2.30888e-06,4.97134e-08,0.640051,0.00119191,-2.15974e-06,-4.15463e-08,0.641241,0.00118747,-2.28438e-06,5.68667e-08,0.642426,0.00118307,-2.11378e-06,-7.10641e-09,0.643607,0.00117882,-2.1351e-06,-2.8441e-08,0.644784,0.00117446,-2.22042e-06,6.12658e-08,0.645956,0.00117021,-2.03663e-06,-3.78083e-08,0.647124,0.00116602,-2.15005e-06,3.03627e-08,0.648288,0.00116181,-2.05896e-06,-2.40379e-08,0.649448,0.00115762,-2.13108e-06,6.57887e-08,0.650603,0.00115356,-1.93371e-06,-6.03028e-08,0.651755,0.00114951,-2.11462e-06,5.62134e-08,0.652902,0.00114545,-1.94598e-06,-4.53417e-08,0.654046,0.00114142,-2.082e-06,6.55489e-08,0.655185,0.00113745,-1.88536e-06,-3.80396e-08,0.656321,0.00113357,-1.99948e-06,2.70049e-08,0.657452,0.00112965,-1.91846e-06,-1.03755e-08,0.65858,0.00112578,-1.94959e-06,1.44973e-08,0.659704,0.00112192,-1.9061e-06,1.1991e-08,0.660824,0.00111815,-1.87012e-06,-2.85634e-09,0.66194,0.0011144,-1.87869e-06,-5.65782e-10,0.663053,0.00111064,-1.88039e-06,5.11947e-09,0.664162,0.0011069,-1.86503e-06,3.96924e-08,0.665267,0.00110328,-1.74595e-06,-4.46795e-08,0.666368,0.00109966,-1.87999e-06,1.98161e-08,0.667466,0.00109596,-1.82054e-06,2.502e-08,0.66856,0.00109239,-1.74548e-06,-6.86593e-10,0.669651,0.0010889,-1.74754e-06,-2.22739e-08,0.670738,0.00108534,-1.81437e-06,3.01776e-08,0.671821,0.0010818,-1.72383e-06,2.07732e-08,0.672902,0.00107841,-1.66151e-06,-5.36658e-08,0.673978,0.00107493,-1.82251e-06,7.46802e-08,0.675051,0.00107151,-1.59847e-06,-6.62411e-08,0.676121,0.00106811,-1.79719e-06,7.10748e-08,0.677188,0.00106473,-1.58397e-06,-3.92441e-08,0.678251,0.00106145,-1.7017e-06,2.62973e-08,0.679311,0.00105812,-1.62281e-06,-6.34035e-09,0.680367,0.00105486,-1.64183e-06,-9.36249e-10,0.68142,0.00105157,-1.64464e-06,1.00854e-08,0.68247,0.00104831,-1.61438e-06,2.01995e-08,0.683517,0.00104514,-1.55378e-06,-3.1279e-08,0.68456,0.00104194,-1.64762e-06,4.53114e-08,0.685601,0.00103878,-1.51169e-06,-3.07573e-08,0.686638,0.00103567,-1.60396e-06,1.81133e-08,0.687672,0.00103251,-1.54962e-06,1.79085e-08,0.688703,0.00102947,-1.49589e-06,-3.01428e-08,0.689731,0.00102639,-1.58632e-06,4.30583e-08,0.690756,0.00102334,-1.45715e-06,-2.28814e-08,0.691778,0.00102036,-1.52579e-06,-1.11373e-08,0.692797,0.00101727,-1.5592e-06,6.74305e-08,0.693812,0.00101436,-1.35691e-06,-7.97709e-08,0.694825,0.0010114,-1.59622e-06,7.28391e-08,0.695835,0.00100843,-1.37771e-06,-3.27715e-08,0.696842,0.00100558,-1.47602e-06,-1.35807e-09,0.697846,0.00100262,-1.48009e-06,3.82037e-08,0.698847,0.000999775,-1.36548e-06,-3.22474e-08,0.699846,0.000996948,-1.46223e-06,3.11809e-08,0.700841,0.000994117,-1.36868e-06,-3.28714e-08,0.701834,0.000991281,-1.4673e-06,4.07001e-08,0.702824,0.000988468,-1.3452e-06,-1.07197e-08,0.703811,0.000985746,-1.37736e-06,2.17866e-09,0.704795,0.000982998,-1.37082e-06,2.00521e-09,0.705777,0.000980262,-1.3648e-06,-1.01996e-08,0.706756,0.000977502,-1.3954e-06,3.87931e-08,0.707732,0.000974827,-1.27902e-06,-2.57632e-08,0.708706,0.000972192,-1.35631e-06,4.65513e-09,0.709676,0.000969493,-1.34235e-06,7.14257e-09,0.710645,0.00096683,-1.32092e-06,2.63791e-08,0.71161,0.000964267,-1.24178e-06,-5.30543e-08,0.712573,0.000961625,-1.40095e-06,6.66289e-08,0.713533,0.000959023,-1.20106e-06,-3.46474e-08,0.714491,0.000956517,-1.305e-06,1.23559e-08,0.715446,0.000953944,-1.26793e-06,-1.47763e-08,0.716399,0.000951364,-1.31226e-06,4.67494e-08,0.717349,0.000948879,-1.17201e-06,-5.3012e-08,0.718297,0.000946376,-1.33105e-06,4.60894e-08,0.719242,0.000943852,-1.19278e-06,-1.21366e-08,0.720185,0.00094143,-1.22919e-06,2.45673e-09,0.721125,0.000938979,-1.22182e-06,2.30966e-09,0.722063,0.000936543,-1.21489e-06,-1.16954e-08,0.722998,0.000934078,-1.24998e-06,4.44718e-08,0.723931,0.000931711,-1.11656e-06,-4.69823e-08,0.724861,0.000929337,-1.25751e-06,2.4248e-08,0.725789,0.000926895,-1.18477e-06,9.5949e-09,0.726715,0.000924554,-1.15598e-06,-3.02286e-09,0.727638,0.000922233,-1.16505e-06,2.49649e-09,0.72856,0.00091991,-1.15756e-06,-6.96321e-09,0.729478,0.000917575,-1.17845e-06,2.53564e-08,0.730395,0.000915294,-1.10238e-06,-3.48578e-08,0.731309,0.000912984,-1.20695e-06,5.44704e-08,0.732221,0.000910734,-1.04354e-06,-6.38144e-08,0.73313,0.000908455,-1.23499e-06,8.15781e-08,0.734038,0.00090623,-9.90253e-07,-8.3684e-08,0.734943,0.000903999,-1.2413e-06,7.43441e-08,0.735846,0.000901739,-1.01827e-06,-3.48787e-08,0.736746,0.000899598,-1.12291e-06,5.56596e-09,0.737645,0.000897369,-1.10621e-06,1.26148e-08,0.738541,0.000895194,-1.06837e-06,3.57935e-09,0.739435,0.000893068,-1.05763e-06,-2.69322e-08,0.740327,0.000890872,-1.13842e-06,4.45448e-08,0.741217,0.000888729,-1.00479e-06,-3.20376e-08,0.742105,0.000886623,-1.1009e-06,2.40011e-08,0.74299,0.000884493,-1.0289e-06,-4.36209e-09,0.743874,0.000882422,-1.04199e-06,-6.55268e-09,0.744755,0.000880319,-1.06164e-06,3.05728e-08,0.745634,0.000878287,-9.69926e-07,-5.61338e-08,0.746512,0.000876179,-1.13833e-06,7.4753e-08,0.747387,0.000874127,-9.14068e-07,-6.40644e-08,0.74826,0.000872106,-1.10626e-06,6.22955e-08,0.749131,0.000870081,-9.19375e-07,-6.59083e-08,0.75,0.000868044,-1.1171e-06,8.21284e-08,0.750867,0.000866056,-8.70714e-07,-8.37915e-08,0.751732,0.000864064,-1.12209e-06,7.42237e-08,0.752595,0.000862042,-8.99418e-07,-3.42894e-08,0.753456,0.00086014,-1.00229e-06,3.32955e-09,0.754315,0.000858146,-9.92297e-07,2.09712e-08,0.755173,0.000856224,-9.29384e-07,-2.76096e-08,0.756028,0.000854282,-1.01221e-06,2.98627e-08,0.756881,0.000852348,-9.22625e-07,-3.22365e-08,0.757733,0.000850406,-1.01933e-06,3.94786e-08,0.758582,0.000848485,-9.00898e-07,-6.46833e-09,0.75943,0.000846664,-9.20303e-07,-1.36052e-08,0.760275,0.000844783,-9.61119e-07,1.28447e-09,0.761119,0.000842864,-9.57266e-07,8.4674e-09,0.761961,0.000840975,-9.31864e-07,2.44506e-08,0.762801,0.000839185,-8.58512e-07,-4.6665e-08,0.763639,0.000837328,-9.98507e-07,4.30001e-08,0.764476,0.00083546,-8.69507e-07,-6.12609e-09,0.76531,0.000833703,-8.87885e-07,-1.84959e-08,0.766143,0.000831871,-9.43372e-07,2.05052e-08,0.766974,0.000830046,-8.81857e-07,-3.92026e-09,0.767803,0.000828271,-8.93618e-07,-4.82426e-09,0.768631,0.000826469,-9.0809e-07,2.32172e-08,0.769456,0.000824722,-8.38439e-07,-2.84401e-08,0.77028,0.00082296,-9.23759e-07,3.09386e-08,0.771102,0.000821205,-8.30943e-07,-3.57099e-08,0.771922,0.000819436,-9.38073e-07,5.22963e-08,0.772741,0.000817717,-7.81184e-07,-5.42658e-08,0.773558,0.000815992,-9.43981e-07,4.55579e-08,0.774373,0.000814241,-8.07308e-07,-8.75656e-09,0.775186,0.0008126,-8.33578e-07,-1.05315e-08,0.775998,0.000810901,-8.65172e-07,-8.72188e-09,0.776808,0.000809145,-8.91338e-07,4.54191e-08,0.777616,0.000807498,-7.5508e-07,-5.37454e-08,0.778423,0.000805827,-9.16317e-07,5.03532e-08,0.779228,0.000804145,-7.65257e-07,-2.84584e-08,0.780031,0.000802529,-8.50632e-07,3.87579e-09,0.780833,0.00080084,-8.39005e-07,1.29552e-08,0.781633,0.0007992,-8.00139e-07,3.90804e-09,0.782432,0.000797612,-7.88415e-07,-2.85874e-08,0.783228,0.000795949,-8.74177e-07,5.0837e-08,0.784023,0.000794353,-7.21666e-07,-5.55513e-08,0.784817,0.000792743,-8.8832e-07,5.21587e-08,0.785609,0.000791123,-7.31844e-07,-3.38744e-08,0.786399,0.000789558,-8.33467e-07,2.37342e-08,0.787188,0.000787962,-7.62264e-07,-1.45775e-09,0.787975,0.000786433,-7.66638e-07,-1.79034e-08,0.788761,0.000784846,-8.20348e-07,1.34665e-08,0.789545,0.000783246,-7.79948e-07,2.3642e-08,0.790327,0.000781757,-7.09022e-07,-4.84297e-08,0.791108,0.000780194,-8.54311e-07,5.08674e-08,0.791888,0.000778638,-7.01709e-07,-3.58303e-08,0.792666,0.000777127,-8.092e-07,3.28493e-08,0.793442,0.000775607,-7.10652e-07,-3.59624e-08,0.794217,0.000774078,-8.1854e-07,5.13959e-08,0.79499,0.000772595,-6.64352e-07,-5.04121e-08,0.795762,0.000771115,-8.15588e-07,3.10431e-08,0.796532,0.000769577,-7.22459e-07,-1.41557e-08,0.797301,0.00076809,-7.64926e-07,2.55795e-08,0.798069,0.000766636,-6.88187e-07,-2.85578e-08,0.798835,0.000765174,-7.73861e-07,2.90472e-08,0.799599,0.000763714,-6.86719e-07,-2.80262e-08,0.800362,0.000762256,-7.70798e-07,2.34531e-08,0.801123,0.000760785,-7.00438e-07,-6.18144e-09,0.801884,0.000759366,-7.18983e-07,1.27263e-09,0.802642,0.000757931,-7.15165e-07,1.09101e-09,0.803399,0.000756504,-7.11892e-07,-5.63675e-09,0.804155,0.000755064,-7.28802e-07,2.14559e-08,0.80491,0.00075367,-6.64434e-07,-2.05821e-08,0.805663,0.00075228,-7.26181e-07,1.26812e-09,0.806414,0.000750831,-7.22377e-07,1.55097e-08,0.807164,0.000749433,-6.75848e-07,-3.70216e-09,0.807913,0.00074807,-6.86954e-07,-7.0105e-10,0.80866,0.000746694,-6.89057e-07,6.5063e-09,0.809406,0.000745336,-6.69538e-07,-2.53242e-08,0.810151,0.000743921,-7.45511e-07,3.51858e-08,0.810894,0.000742535,-6.39953e-07,3.79034e-09,0.811636,0.000741267,-6.28582e-07,-5.03471e-08,0.812377,0.000739858,-7.79624e-07,7.83886e-08,0.813116,0.000738534,-5.44458e-07,-8.43935e-08,0.813854,0.000737192,-7.97638e-07,8.03714e-08,0.81459,0.000735838,-5.56524e-07,-5.82784e-08,0.815325,0.00073455,-7.31359e-07,3.35329e-08,0.816059,0.000733188,-6.3076e-07,-1.62486e-08,0.816792,0.000731878,-6.79506e-07,3.14614e-08,0.817523,0.000730613,-5.85122e-07,-4.99925e-08,0.818253,0.000729293,-7.35099e-07,4.92994e-08,0.818982,0.000727971,-5.87201e-07,-2.79959e-08,0.819709,0.000726712,-6.71189e-07,3.07959e-09,0.820435,0.000725379,-6.6195e-07,1.56777e-08,0.82116,0.000724102,-6.14917e-07,-6.18564e-09,0.821883,0.000722854,-6.33474e-07,9.06488e-09,0.822606,0.000721614,-6.06279e-07,-3.00739e-08,0.823327,0.000720311,-6.96501e-07,5.16262e-08,0.824046,0.000719073,-5.41623e-07,-5.72214e-08,0.824765,0.000717818,-7.13287e-07,5.80503e-08,0.825482,0.000716566,-5.39136e-07,-5.57703e-08,0.826198,0.00071532,-7.06447e-07,4.58215e-08,0.826912,0.000714045,-5.68983e-07,-8.30636e-09,0.827626,0.000712882,-5.93902e-07,-1.25961e-08,0.828338,0.000711656,-6.3169e-07,-9.13985e-10,0.829049,0.00071039,-6.34432e-07,1.62519e-08,0.829759,0.00070917,-5.85676e-07,-4.48904e-09,0.830468,0.000707985,-5.99143e-07,1.70418e-09,0.831175,0.000706792,-5.9403e-07,-2.32768e-09,0.831881,0.000705597,-6.01014e-07,7.60648e-09,0.832586,0.000704418,-5.78194e-07,-2.80982e-08,0.83329,0.000703177,-6.62489e-07,4.51817e-08,0.833993,0.000701988,-5.26944e-07,-3.34192e-08,0.834694,0.000700834,-6.27201e-07,2.88904e-08,0.835394,0.000699666,-5.4053e-07,-2.25378e-08,0.836093,0.000698517,-6.08143e-07,1.65589e-09,0.836791,0.000697306,-6.03176e-07,1.59142e-08,0.837488,0.000696147,-5.55433e-07,-5.70801e-09,0.838184,0.000695019,-5.72557e-07,6.91792e-09,0.838878,0.000693895,-5.51803e-07,-2.19637e-08,0.839571,0.000692725,-6.17694e-07,2.13321e-08,0.840263,0.000691554,-5.53698e-07,-3.75996e-09,0.840954,0.000690435,-5.64978e-07,-6.29219e-09,0.841644,0.000689287,-5.83855e-07,2.89287e-08,0.842333,0.000688206,-4.97068e-07,-4.98181e-08,0.843021,0.000687062,-6.46523e-07,5.11344e-08,0.843707,0.000685922,-4.9312e-07,-3.55102e-08,0.844393,0.00068483,-5.9965e-07,3.13019e-08,0.845077,0.000683724,-5.05745e-07,-3.00925e-08,0.84576,0.000682622,-5.96022e-07,2.94636e-08,0.846442,0.000681519,-5.07631e-07,-2.81572e-08,0.847123,0.000680419,-5.92103e-07,2.35606e-08,0.847803,0.000679306,-5.21421e-07,-6.48045e-09,0.848482,0.000678243,-5.40863e-07,2.36124e-09,0.849159,0.000677169,-5.33779e-07,-2.96461e-09,0.849836,0.000676092,-5.42673e-07,9.49728e-09,0.850512,0.000675035,-5.14181e-07,-3.50245e-08,0.851186,0.000673902,-6.19254e-07,7.09959e-08,0.851859,0.000672876,-4.06267e-07,-7.01453e-08,0.852532,0.000671853,-6.16703e-07,3.07714e-08,0.853203,0.000670712,-5.24388e-07,6.66423e-09,0.853873,0.000669684,-5.04396e-07,2.17629e-09,0.854542,0.000668681,-4.97867e-07,-1.53693e-08,0.855211,0.000667639,-5.43975e-07,-3.03752e-10,0.855878,0.000666551,-5.44886e-07,1.65844e-08,0.856544,0.000665511,-4.95133e-07,-6.42907e-09,0.857209,0.000664501,-5.1442e-07,9.13195e-09,0.857873,0.0006635,-4.87024e-07,-3.00987e-08,0.858536,0.000662435,-5.7732e-07,5.16584e-08,0.859198,0.000661436,-4.22345e-07,-5.73255e-08,0.859859,0.000660419,-5.94322e-07,5.84343e-08,0.860518,0.000659406,-4.19019e-07,-5.72022e-08,0.861177,0.000658396,-5.90626e-07,5.11653e-08,0.861835,0.000657368,-4.3713e-07,-2.82495e-08,0.862492,0.000656409,-5.21878e-07,2.22788e-09,0.863148,0.000655372,-5.15195e-07,1.9338e-08,0.863803,0.0006544,-4.5718e-07,-1.99754e-08,0.864457,0.000653425,-5.17107e-07,9.59024e-10,0.86511,0.000652394,-5.1423e-07,1.61393e-08,0.865762,0.000651414,-4.65812e-07,-5.91149e-09,0.866413,0.000650465,-4.83546e-07,7.50665e-09,0.867063,0.00064952,-4.61026e-07,-2.4115e-08,0.867712,0.000648526,-5.33371e-07,2.93486e-08,0.86836,0.000647547,-4.45325e-07,-3.36748e-08,0.869007,0.000646555,-5.4635e-07,4.57461e-08,0.869653,0.0006456,-4.09112e-07,-3.01002e-08,0.870298,0.000644691,-4.99412e-07,1.50501e-08,0.870942,0.000643738,-4.54262e-07,-3.01002e-08,0.871585,0.000642739,-5.44563e-07,4.57461e-08,0.872228,0.000641787,-4.07324e-07,-3.36748e-08,0.872869,0.000640871,-5.08349e-07,2.93486e-08,0.873509,0.000639943,-4.20303e-07,-2.4115e-08,0.874149,0.00063903,-4.92648e-07,7.50655e-09,0.874787,0.000638067,-4.70128e-07,-5.91126e-09,0.875425,0.000637109,-4.87862e-07,1.61385e-08,0.876062,0.000636182,-4.39447e-07,9.61961e-10,0.876697,0.000635306,-4.36561e-07,-1.99863e-08,0.877332,0.000634373,-4.9652e-07,1.93785e-08,0.877966,0.000633438,-4.38384e-07,2.07697e-09,0.878599,0.000632567,-4.32153e-07,-2.76864e-08,0.879231,0.00063162,-5.15212e-07,4.90641e-08,0.879862,0.000630737,-3.6802e-07,-4.93606e-08,0.880493,0.000629852,-5.16102e-07,2.9169e-08,0.881122,0.000628908,-4.28595e-07,-7.71083e-09,0.881751,0.000628027,-4.51727e-07,1.6744e-09,0.882378,0.000627129,-4.46704e-07,1.01317e-09,0.883005,0.000626239,-4.43665e-07,-5.72703e-09,0.883631,0.000625334,-4.60846e-07,2.1895e-08,0.884255,0.000624478,-3.95161e-07,-2.22481e-08,0.88488,0.000623621,-4.61905e-07,7.4928e-09,0.885503,0.00062272,-4.39427e-07,-7.72306e-09,0.886125,0.000621818,-4.62596e-07,2.33995e-08,0.886746,0.000620963,-3.92398e-07,-2.62704e-08,0.887367,0.000620099,-4.71209e-07,2.20775e-08,0.887987,0.000619223,-4.04976e-07,-2.43496e-09,0.888605,0.000618406,-4.12281e-07,-1.23377e-08,0.889223,0.000617544,-4.49294e-07,-7.81876e-09,0.88984,0.000616622,-4.72751e-07,4.36128e-08,0.890457,0.000615807,-3.41912e-07,-4.7423e-08,0.891072,0.000614981,-4.84181e-07,2.68698e-08,0.891687,0.000614093,-4.03572e-07,-4.51384e-10,0.8923,0.000613285,-4.04926e-07,-2.50643e-08,0.892913,0.0006124,-4.80119e-07,4.11038e-08,0.893525,0.000611563,-3.56808e-07,-2.01414e-08,0.894136,0.000610789,-4.17232e-07,-2.01426e-08,0.894747,0.000609894,-4.7766e-07,4.11073e-08,0.895356,0.000609062,-3.54338e-07,-2.50773e-08,0.895965,0.000608278,-4.2957e-07,-4.02954e-10,0.896573,0.000607418,-4.30779e-07,2.66891e-08,0.89718,0.000606636,-3.50711e-07,-4.67489e-08,0.897786,0.000605795,-4.90958e-07,4.10972e-08,0.898391,0.000604936,-3.67666e-07,1.56948e-09,0.898996,0.000604205,-3.62958e-07,-4.73751e-08,0.8996,0.000603337,-5.05083e-07,6.87214e-08,0.900202,0.000602533,-2.98919e-07,-4.86966e-08,0.900805,0.000601789,-4.45009e-07,6.85589e-09,0.901406,0.00060092,-4.24441e-07,2.1273e-08,0.902007,0.000600135,-3.60622e-07,-3.23434e-08,0.902606,0.000599317,-4.57652e-07,4.84959e-08,0.903205,0.000598547,-3.12164e-07,-4.24309e-08,0.903803,0.000597795,-4.39457e-07,2.01844e-09,0.904401,0.000596922,-4.33402e-07,3.43571e-08,0.904997,0.000596159,-3.30331e-07,-2.02374e-08,0.905593,0.000595437,-3.91043e-07,-1.30123e-08,0.906188,0.000594616,-4.3008e-07,1.26819e-08,0.906782,0.000593794,-3.92034e-07,2.18894e-08,0.907376,0.000593076,-3.26366e-07,-4.06349e-08,0.907968,0.000592301,-4.4827e-07,2.1441e-08,0.90856,0.000591469,-3.83947e-07,1.44754e-08,0.909151,0.000590744,-3.40521e-07,-1.97379e-08,0.909742,0.000590004,-3.99735e-07,4.87161e-09,0.910331,0.000589219,-3.8512e-07,2.51532e-10,0.91092,0.00058845,-3.84366e-07,-5.87776e-09,0.911508,0.000587663,-4.01999e-07,2.32595e-08,0.912096,0.000586929,-3.3222e-07,-2.75554e-08,0.912682,0.000586182,-4.14887e-07,2.73573e-08,0.913268,0.000585434,-3.32815e-07,-2.22692e-08,0.913853,0.000584702,-3.99622e-07,2.11486e-09,0.914437,0.000583909,-3.93278e-07,1.38098e-08,0.915021,0.000583164,-3.51848e-07,2.25042e-09,0.915604,0.000582467,-3.45097e-07,-2.28115e-08,0.916186,0.000581708,-4.13531e-07,2.93911e-08,0.916767,0.000580969,-3.25358e-07,-3.51481e-08,0.917348,0.000580213,-4.30803e-07,5.15967e-08,0.917928,0.000579506,-2.76012e-07,-5.20296e-08,0.918507,0.000578798,-4.32101e-07,3.73124e-08,0.919085,0.000578046,-3.20164e-07,-3.76154e-08,0.919663,0.000577293,-4.3301e-07,5.35447e-08,0.92024,0.000576587,-2.72376e-07,-5.7354e-08,0.920816,0.000575871,-4.44438e-07,5.66621e-08,0.921391,0.000575152,-2.74452e-07,-5.00851e-08,0.921966,0.000574453,-4.24707e-07,2.4469e-08,0.92254,0.000573677,-3.513e-07,1.18138e-08,0.923114,0.000573009,-3.15859e-07,-1.21195e-08,0.923686,0.000572341,-3.52217e-07,-2.29403e-08,0.924258,0.000571568,-4.21038e-07,4.4276e-08,0.924829,0.000570859,-2.8821e-07,-3.49546e-08,0.9254,0.000570178,-3.93074e-07,3.59377e-08,0.92597,0.000569499,-2.85261e-07,-4.91915e-08,0.926539,0.000568781,-4.32835e-07,4.16189e-08,0.927107,0.00056804,-3.07979e-07,1.92523e-09,0.927675,0.00056743,-3.02203e-07,-4.93198e-08,0.928242,0.000566678,-4.50162e-07,7.61447e-08,0.928809,0.000566006,-2.21728e-07,-7.6445e-08,0.929374,0.000565333,-4.51063e-07,5.08216e-08,0.929939,0.000564583,-2.98599e-07,-7.63212e-09,0.930503,0.000563963,-3.21495e-07,-2.02931e-08,0.931067,0.000563259,-3.82374e-07,2.92001e-08,0.93163,0.000562582,-2.94774e-07,-3.69025e-08,0.932192,0.000561882,-4.05482e-07,5.88053e-08,0.932754,0.000561247,-2.29066e-07,-7.91094e-08,0.933315,0.000560552,-4.66394e-07,7.88184e-08,0.933875,0.000559856,-2.29939e-07,-5.73501e-08,0.934434,0.000559224,-4.01989e-07,3.13727e-08,0.934993,0.000558514,-3.07871e-07,-8.53611e-09,0.935551,0.000557873,-3.33479e-07,2.77175e-09,0.936109,0.000557214,-3.25164e-07,-2.55091e-09,0.936666,0.000556556,-3.32817e-07,7.43188e-09,0.937222,0.000555913,-3.10521e-07,-2.71766e-08,0.937778,0.00055521,-3.92051e-07,4.167e-08,0.938333,0.000554551,-2.67041e-07,-2.02941e-08,0.938887,0.000553956,-3.27923e-07,-2.00984e-08,0.93944,0.00055324,-3.88218e-07,4.10828e-08,0.939993,0.000552587,-2.6497e-07,-2.50237e-08,0.940546,0.000551982,-3.40041e-07,-5.92583e-10,0.941097,0.0005513,-3.41819e-07,2.7394e-08,0.941648,0.000550698,-2.59637e-07,-4.93788e-08,0.942199,0.000550031,-4.07773e-07,5.09119e-08,0.942748,0.000549368,-2.55038e-07,-3.50595e-08,0.943297,0.000548753,-3.60216e-07,2.97214e-08,0.943846,0.000548122,-2.71052e-07,-2.42215e-08,0.944394,0.000547507,-3.43716e-07,7.55985e-09,0.944941,0.000546842,-3.21037e-07,-6.01796e-09,0.945487,0.000546182,-3.3909e-07,1.65119e-08,0.946033,0.000545553,-2.89555e-07,-4.2498e-10,0.946578,0.000544973,-2.9083e-07,-1.4812e-08,0.947123,0.000544347,-3.35266e-07,6.83068e-11,0.947667,0.000543676,-3.35061e-07,1.45388e-08,0.94821,0.00054305,-2.91444e-07,1.38123e-09,0.948753,0.000542471,-2.87301e-07,-2.00637e-08,0.949295,0.000541836,-3.47492e-07,1.92688e-08,0.949837,0.000541199,-2.89685e-07,2.59298e-09,0.950378,0.000540628,-2.81906e-07,-2.96407e-08,0.950918,0.000539975,-3.70829e-07,5.63652e-08,0.951458,0.000539402,-2.01733e-07,-7.66107e-08,0.951997,0.000538769,-4.31565e-07,7.12638e-08,0.952535,0.00053812,-2.17774e-07,-2.96305e-08,0.953073,0.000537595,-3.06665e-07,-1.23464e-08,0.95361,0.000536945,-3.43704e-07,1.94114e-08,0.954147,0.000536316,-2.8547e-07,-5.69451e-09,0.954683,0.000535728,-3.02554e-07,3.36666e-09,0.955219,0.000535133,-2.92454e-07,-7.77208e-09,0.955753,0.000534525,-3.1577e-07,2.77216e-08,0.956288,0.000533976,-2.32605e-07,-4.35097e-08,0.956821,0.00053338,-3.63134e-07,2.7108e-08,0.957354,0.000532735,-2.8181e-07,-5.31772e-09,0.957887,0.000532156,-2.97764e-07,-5.83718e-09,0.958419,0.000531543,-3.15275e-07,2.86664e-08,0.95895,0.000530998,-2.29276e-07,-4.9224e-08,0.959481,0.000530392,-3.76948e-07,4.90201e-08,0.960011,0.000529785,-2.29887e-07,-2.76471e-08,0.96054,0.000529243,-3.12829e-07,1.96385e-09,0.961069,0.000528623,-3.06937e-07,1.97917e-08,0.961598,0.000528068,-2.47562e-07,-2.15261e-08,0.962125,0.000527508,-3.1214e-07,6.70795e-09,0.962653,0.000526904,-2.92016e-07,-5.30573e-09,0.963179,0.000526304,-3.07934e-07,1.4515e-08,0.963705,0.000525732,-2.64389e-07,6.85048e-09,0.964231,0.000525224,-2.43837e-07,-4.19169e-08,0.964756,0.00052461,-3.69588e-07,4.1608e-08,0.96528,0.000523996,-2.44764e-07,-5.30598e-09,0.965804,0.000523491,-2.60682e-07,-2.03841e-08,0.966327,0.000522908,-3.21834e-07,2.72378e-08,0.966849,0.000522346,-2.40121e-07,-2.89625e-08,0.967371,0.000521779,-3.27008e-07,2.90075e-08,0.967893,0.000521212,-2.39986e-07,-2.74629e-08,0.968414,0.00052065,-3.22374e-07,2.12396e-08,0.968934,0.000520069,-2.58656e-07,2.10922e-09,0.969454,0.000519558,-2.52328e-07,-2.96765e-08,0.969973,0.000518964,-3.41357e-07,5.6992e-08,0.970492,0.000518452,-1.70382e-07,-7.90821e-08,0.97101,0.000517874,-4.07628e-07,8.05224e-08,0.971528,0.000517301,-1.66061e-07,-6.41937e-08,0.972045,0.000516776,-3.58642e-07,5.70429e-08,0.972561,0.00051623,-1.87513e-07,-4.47686e-08,0.973077,0.00051572,-3.21819e-07,2.82237e-09,0.973593,0.000515085,-3.13352e-07,3.34792e-08,0.974108,0.000514559,-2.12914e-07,-1.75298e-08,0.974622,0.000514081,-2.65503e-07,-2.29648e-08,0.975136,0.000513481,-3.34398e-07,4.97843e-08,0.975649,0.000512961,-1.85045e-07,-5.6963e-08,0.976162,0.00051242,-3.55934e-07,5.88585e-08,0.976674,0.000511885,-1.79359e-07,-5.92616e-08,0.977185,0.000511348,-3.57143e-07,5.89785e-08,0.977696,0.000510811,-1.80208e-07,-5.74433e-08,0.978207,0.000510278,-3.52538e-07,5.15854e-08,0.978717,0.000509728,-1.97781e-07,-2.9689e-08,0.979226,0.000509243,-2.86848e-07,7.56591e-09,0.979735,0.000508692,-2.64151e-07,-5.74649e-10,0.980244,0.000508162,-2.65875e-07,-5.26732e-09,0.980752,0.000507615,-2.81677e-07,2.16439e-08,0.981259,0.000507116,-2.16745e-07,-2.17037e-08,0.981766,0.000506618,-2.81856e-07,5.56636e-09,0.982272,0.000506071,-2.65157e-07,-5.61689e-10,0.982778,0.000505539,-2.66842e-07,-3.31963e-09,0.983283,0.000504995,-2.76801e-07,1.38402e-08,0.983788,0.000504483,-2.3528e-07,7.56339e-09,0.984292,0.000504035,-2.1259e-07,-4.40938e-08,0.984796,0.000503478,-3.44871e-07,4.96026e-08,0.985299,0.000502937,-1.96064e-07,-3.51071e-08,0.985802,0.000502439,-3.01385e-07,3.12212e-08,0.986304,0.00050193,-2.07721e-07,-3.0173e-08,0.986806,0.000501424,-2.9824e-07,2.9866e-08,0.987307,0.000500917,-2.08642e-07,-2.96865e-08,0.987808,0.000500411,-2.97702e-07,2.92753e-08,0.988308,0.000499903,-2.09876e-07,-2.78101e-08,0.988807,0.0004994,-2.93306e-07,2.23604e-08,0.989307,0.000498881,-2.26225e-07,-2.02681e-09,0.989805,0.000498422,-2.32305e-07,-1.42531e-08,0.990303,0.000497915,-2.75065e-07,-5.65232e-10,0.990801,0.000497363,-2.76761e-07,1.65141e-08,0.991298,0.000496859,-2.27218e-07,-5.88639e-09,0.991795,0.000496387,-2.44878e-07,7.0315e-09,0.992291,0.000495918,-2.23783e-07,-2.22396e-08,0.992787,0.000495404,-2.90502e-07,2.23224e-08,0.993282,0.00049489,-2.23535e-07,-7.44543e-09,0.993776,0.000494421,-2.45871e-07,7.45924e-09,0.994271,0.000493951,-2.23493e-07,-2.23915e-08,0.994764,0.000493437,-2.90668e-07,2.25021e-08,0.995257,0.000492923,-2.23161e-07,-8.01218e-09,0.99575,0.000492453,-2.47198e-07,9.54669e-09,0.996242,0.000491987,-2.18558e-07,-3.01746e-08,0.996734,0.000491459,-3.09082e-07,5.1547e-08,0.997225,0.000490996,-1.54441e-07,-5.68039e-08,0.997716,0.000490517,-3.24853e-07,5.64594e-08,0.998206,0.000490036,-1.55474e-07,-4.98245e-08,0.998696,0.000489576,-3.04948e-07,2.36292e-08,0.999186,0.000489037,-2.3406e-07,1.49121e-08,0.999674,0.000488613,-1.89324e-07,-2.3673e-08,1.00016,0.000488164,-2.60343e-07,2.01754e-08,1.00065,0.000487704,-1.99816e-07,-5.70288e-08,1.00114,0.000487133,-3.70903e-07,8.87303e-08,1.00162,0.000486657,-1.04712e-07,-5.94737e-08,1.00211,0.000486269,-2.83133e-07,2.99553e-08,1.0026,0.000485793,-1.93267e-07,-6.03474e-08,1.00308,0.000485225,-3.74309e-07,9.2225e-08,1.00357,0.000484754,-9.76345e-08,-7.0134e-08,1.00405,0.000484348,-3.08036e-07,6.91016e-08,1.00454,0.000483939,-1.00731e-07,-8.70633e-08,1.00502,0.000483476,-3.61921e-07,4.07328e-08,1.0055,0.000482875,-2.39723e-07,4.33413e-08,1.00599,0.000482525,-1.09699e-07,-9.48886e-08,1.00647,0.000482021,-3.94365e-07,9.77947e-08,1.00695,0.000481526,-1.00981e-07,-5.78713e-08,1.00743,0.00048115,-2.74595e-07,1.44814e-08,1.00791,0.000480645,-2.31151e-07,-5.42665e-11,1.00839,0.000480182,-2.31314e-07,-1.42643e-08,1.00887,0.000479677,-2.74106e-07,5.71115e-08,1.00935,0.0004793,-1.02772e-07,-9.49724e-08,1.00983,0.000478809,-3.87689e-07,8.43596e-08,1.01031,0.000478287,-1.3461e-07,-4.04755e-09,1.01079,0.000478006,-1.46753e-07,-6.81694e-08,1.01127,0.000477508,-3.51261e-07,3.83067e-08,1.01174,0.00047692,-2.36341e-07,3.41521e-08,1.01222,0.00047655,-1.33885e-07,-5.57058e-08,1.0127,0.000476115,-3.01002e-07,6.94616e-08,1.01317,0.000475721,-9.26174e-08,-1.02931e-07,1.01365,0.000475227,-4.01412e-07,1.03846e-07,1.01412,0.000474736,-8.98751e-08,-7.40321e-08,1.0146,0.000474334,-3.11971e-07,7.30735e-08,1.01507,0.00047393,-9.27508e-08,-9.90527e-08,1.01554,0.000473447,-3.89909e-07,8.47188e-08,1.01602,0.000472921,-1.35753e-07,-1.40381e-09,1.01649,0.000472645,-1.39964e-07,-7.91035e-08,1.01696,0.000472128,-3.77275e-07,7.93993e-08,1.01744,0.000471612,-1.39077e-07,-7.52607e-11,1.01791,0.000471334,-1.39302e-07,-7.90983e-08,1.01838,0.000470818,-3.76597e-07,7.80499e-08,1.01885,0.000470299,-1.42448e-07,5.31733e-09,1.01932,0.00047003,-1.26496e-07,-9.93193e-08,1.01979,0.000469479,-4.24453e-07,1.53541e-07,1.02026,0.00046909,3.617e-08,-1.57217e-07,1.02073,0.000468691,-4.35482e-07,1.177e-07,1.02119,0.000468173,-8.23808e-08,-7.51659e-08,1.02166,0.000467783,-3.07878e-07,6.37538e-08,1.02213,0.000467358,-1.16617e-07,-6.064e-08,1.0226,0.000466943,-2.98537e-07,5.9597e-08,1.02306,0.000466525,-1.19746e-07,-5.85386e-08,1.02353,0.00046611,-2.95362e-07,5.53482e-08,1.024,0.000465685,-1.29317e-07,-4.36449e-08,1.02446,0.000465296,-2.60252e-07,2.20268e-11,1.02493,0.000464775,-2.60186e-07,4.35568e-08,1.02539,0.000464386,-1.29516e-07,-5.50398e-08,1.02586,0.000463961,-2.94635e-07,5.73932e-08,1.02632,0.000463544,-1.22456e-07,-5.53236e-08,1.02678,0.000463133,-2.88426e-07,4.46921e-08,1.02725,0.000462691,-1.5435e-07,-4.23534e-09,1.02771,0.000462369,-1.67056e-07,-2.77507e-08,1.02817,0.000461952,-2.50308e-07,-3.97101e-09,1.02863,0.000461439,-2.62221e-07,4.36348e-08,1.02909,0.000461046,-1.31317e-07,-5.13589e-08,1.02955,0.000460629,-2.85394e-07,4.25913e-08,1.03001,0.000460186,-1.5762e-07,2.0285e-10,1.03047,0.000459871,-1.57011e-07,-4.34027e-08,1.03093,0.000459427,-2.87219e-07,5.41987e-08,1.03139,0.000459015,-1.24623e-07,-5.4183e-08,1.03185,0.000458604,-2.87172e-07,4.33239e-08,1.03231,0.000458159,-1.572e-07,9.65817e-11,1.03277,0.000457845,-1.56911e-07,-4.37103e-08,1.03323,0.0004574,-2.88041e-07,5.55351e-08,1.03368,0.000456991,-1.21436e-07,-5.9221e-08,1.03414,0.00045657,-2.99099e-07,6.21394e-08,1.0346,0.000456158,-1.1268e-07,-7.01275e-08,1.03505,0.000455723,-3.23063e-07,9.91614e-08,1.03551,0.000455374,-2.55788e-08,-8.80996e-08,1.03596,0.000455058,-2.89878e-07,1.48184e-08,1.03642,0.000454523,-2.45422e-07,2.88258e-08,1.03687,0.000454119,-1.58945e-07,-1.09125e-08,1.03733,0.000453768,-1.91682e-07,1.48241e-08,1.03778,0.000453429,-1.4721e-07,-4.83838e-08,1.03823,0.00045299,-2.92361e-07,5.95019e-08,1.03869,0.000452584,-1.13856e-07,-7.04146e-08,1.03914,0.000452145,-3.25099e-07,1.02947e-07,1.03959,0.000451803,-1.62583e-08,-1.02955e-07,1.04004,0.000451462,-3.25123e-07,7.04544e-08,1.04049,0.000451023,-1.1376e-07,-5.96534e-08,1.04094,0.000450616,-2.9272e-07,4.89499e-08,1.04139,0.000450178,-1.45871e-07,-1.69369e-08,1.04184,0.000449835,-1.96681e-07,1.87977e-08,1.04229,0.000449498,-1.40288e-07,-5.82539e-08,1.04274,0.000449043,-3.1505e-07,9.50087e-08,1.04319,0.000448698,-3.00238e-08,-8.33623e-08,1.04364,0.000448388,-2.80111e-07,2.20363e-11,1.04409,0.000447828,-2.80045e-07,8.32742e-08,1.04454,0.000447517,-3.02221e-08,-9.47002e-08,1.04498,0.000447173,-3.14323e-07,5.7108e-08,1.04543,0.000446716,-1.42999e-07,-1.45225e-08,1.04588,0.000446386,-1.86566e-07,9.82022e-10,1.04632,0.000446016,-1.8362e-07,1.05944e-08,1.04677,0.00044568,-1.51837e-07,-4.33597e-08,1.04721,0.000445247,-2.81916e-07,4.36352e-08,1.04766,0.000444814,-1.51011e-07,-1.19717e-08,1.0481,0.000444476,-1.86926e-07,4.25158e-09,1.04855,0.000444115,-1.74171e-07,-5.03461e-09,1.04899,0.000443751,-1.89275e-07,1.58868e-08,1.04944,0.00044342,-1.41614e-07,-5.85127e-08,1.04988,0.000442961,-3.17152e-07,9.89548e-08,1.05032,0.000442624,-2.0288e-08,-9.88878e-08,1.05076,0.000442287,-3.16951e-07,5.81779e-08,1.05121,0.000441827,-1.42418e-07,-1.46144e-08,1.05165,0.000441499,-1.86261e-07,2.79892e-10,1.05209,0.000441127,-1.85421e-07,1.34949e-08,1.05253,0.000440797,-1.44937e-07,-5.42594e-08,1.05297,0.000440344,-3.07715e-07,8.43335e-08,1.05341,0.000439982,-5.47146e-08,-4.46558e-08,1.05385,0.000439738,-1.88682e-07,-2.49193e-08,1.05429,0.000439286,-2.6344e-07,2.5124e-08,1.05473,0.000438835,-1.88068e-07,4.36328e-08,1.05517,0.000438589,-5.71699e-08,-8.04459e-08,1.05561,0.000438234,-2.98508e-07,3.97324e-08,1.05605,0.000437756,-1.79311e-07,4.07258e-08,1.05648,0.000437519,-5.71332e-08,-8.34263e-08,1.05692,0.000437155,-3.07412e-07,5.45608e-08,1.05736,0.000436704,-1.4373e-07,-1.56078e-08,1.05779,0.000436369,-1.90553e-07,7.87043e-09,1.05823,0.000436012,-1.66942e-07,-1.58739e-08,1.05867,0.00043563,-2.14563e-07,5.56251e-08,1.0591,0.000435368,-4.76881e-08,-8.74172e-08,1.05954,0.000435011,-3.0994e-07,5.56251e-08,1.05997,0.000434558,-1.43064e-07,-1.58739e-08,1.06041,0.000434224,-1.90686e-07,7.87042e-09,1.06084,0.000433866,-1.67075e-07,-1.56078e-08,1.06127,0.000433485,-2.13898e-07,5.45609e-08,1.06171,0.000433221,-5.02157e-08,-8.34263e-08,1.06214,0.00043287,-3.00495e-07,4.07258e-08,1.06257,0.000432391,-1.78317e-07,3.97325e-08,1.063,0.000432154,-5.91198e-08,-8.04464e-08,1.06344,0.000431794,-3.00459e-07,4.36347e-08,1.06387,0.000431324,-1.69555e-07,2.5117e-08,1.0643,0.000431061,-9.42041e-08,-2.48934e-08,1.06473,0.000430798,-1.68884e-07,-4.47527e-08,1.06516,0.000430326,-3.03142e-07,8.46951e-08,1.06559,0.000429973,-4.90573e-08,-5.56089e-08,1.06602,0.000429708,-2.15884e-07,1.85314e-08,1.06645,0.000429332,-1.6029e-07,-1.85166e-08,1.06688,0.000428956,-2.1584e-07,5.5535e-08,1.06731,0.000428691,-4.92347e-08,-8.44142e-08,1.06774,0.000428339,-3.02477e-07,4.37032e-08,1.06816,0.000427865,-1.71368e-07,2.88107e-08,1.06859,0.000427609,-8.49356e-08,-3.97367e-08,1.06902,0.00042732,-2.04146e-07,1.09267e-08,1.06945,0.000426945,-1.71365e-07,-3.97023e-09,1.06987,0.00042659,-1.83276e-07,4.9542e-09,1.0703,0.000426238,-1.68414e-07,-1.58466e-08,1.07073,0.000425854,-2.15953e-07,5.84321e-08,1.07115,0.000425597,-4.0657e-08,-9.86725e-08,1.07158,0.00042522,-3.36674e-07,9.78392e-08,1.072,0.00042484,-4.31568e-08,-5.42658e-08,1.07243,0.000424591,-2.05954e-07,1.45377e-11,1.07285,0.000424179,-2.0591e-07,5.42076e-08,1.07328,0.00042393,-4.32877e-08,-9.76357e-08,1.0737,0.00042355,-3.36195e-07,9.79165e-08,1.07412,0.000423172,-4.24451e-08,-5.56118e-08,1.07455,0.00042292,-2.09281e-07,5.32143e-09,1.07497,0.000422518,-1.93316e-07,3.43261e-08,1.07539,0.000422234,-9.0338e-08,-2.34165e-08,1.07581,0.000421983,-1.60588e-07,-5.98692e-08,1.07623,0.000421482,-3.40195e-07,1.43684e-07,1.07666,0.000421233,9.08574e-08,-1.5724e-07,1.07708,0.000420943,-3.80862e-07,1.27647e-07,1.0775,0.000420564,2.0791e-09,-1.1493e-07,1.07792,0.000420223,-3.4271e-07,9.36534e-08,1.07834,0.000419819,-6.17499e-08,-2.12653e-08,1.07876,0.000419632,-1.25546e-07,-8.59219e-09,1.07918,0.000419355,-1.51322e-07,-6.35752e-08,1.0796,0.000418861,-3.42048e-07,1.43684e-07,1.08002,0.000418608,8.90034e-08,-1.53532e-07,1.08043,0.000418326,-3.71593e-07,1.12817e-07,1.08085,0.000417921,-3.31414e-08,-5.93184e-08,1.08127,0.000417677,-2.11097e-07,5.24697e-09,1.08169,0.00041727,-1.95356e-07,3.83305e-08,1.0821,0.000416995,-8.03642e-08,-3.93597e-08,1.08252,0.000416716,-1.98443e-07,-1.0094e-10,1.08294,0.000416319,-1.98746e-07,3.97635e-08,1.08335,0.00041604,-7.94557e-08,-3.97437e-08,1.08377,0.000415762,-1.98687e-07,1.94215e-12,1.08419,0.000415365,-1.98681e-07,3.97359e-08,1.0846,0.000415087,-7.94732e-08,-3.97362e-08,1.08502,0.000414809,-1.98682e-07,-4.31063e-13,1.08543,0.000414411,-1.98683e-07,3.97379e-08,1.08584,0.000414133,-7.94694e-08,-3.97418e-08,1.08626,0.000413855,-1.98695e-07,2.00563e-11,1.08667,0.000413458,-1.98635e-07,3.96616e-08,1.08709,0.000413179,-7.965e-08,-3.9457e-08,1.0875,0.000412902,-1.98021e-07,-1.04281e-09,1.08791,0.000412502,-2.01149e-07,4.36282e-08,1.08832,0.000412231,-7.02648e-08,-5.42608e-08,1.08874,0.000411928,-2.33047e-07,5.42057e-08,1.08915,0.000411624,-7.04301e-08,-4.33527e-08,1.08956,0.000411353,-2.00488e-07,-4.07378e-12,1.08997,0.000410952,-2.005e-07,4.3369e-08,1.09038,0.000410681,-7.03934e-08,-5.42627e-08,1.09079,0.000410378,-2.33182e-07,5.44726e-08,1.0912,0.000410075,-6.97637e-08,-4.44186e-08,1.09161,0.000409802,-2.03019e-07,3.99235e-09,1.09202,0.000409408,-1.91042e-07,2.84491e-08,1.09243,0.000409111,-1.05695e-07,1.42043e-09,1.09284,0.000408904,-1.01434e-07,-3.41308e-08,1.09325,0.000408599,-2.03826e-07,1.58937e-08,1.09366,0.000408239,-1.56145e-07,-2.94438e-08,1.09406,0.000407838,-2.44476e-07,1.01881e-07,1.09447,0.000407655,6.11676e-08,-1.39663e-07,1.09488,0.000407358,-3.57822e-07,9.91432e-08,1.09529,0.00040694,-6.03921e-08,-1.84912e-08,1.09569,0.000406764,-1.15866e-07,-2.51785e-08,1.0961,0.000406457,-1.91401e-07,-4.03115e-12,1.09651,0.000406074,-1.91413e-07,2.51947e-08,1.09691,0.000405767,-1.15829e-07,1.84346e-08,1.09732,0.00040559,-6.05254e-08,-9.89332e-08,1.09772,0.000405172,-3.57325e-07,1.3888e-07,1.09813,0.000404874,5.93136e-08,-9.8957e-08,1.09853,0.000404696,-2.37557e-07,1.853e-08,1.09894,0.000404277,-1.81968e-07,2.48372e-08,1.09934,0.000403987,-1.07456e-07,1.33047e-09,1.09975,0.000403776,-1.03465e-07,-3.01591e-08,1.10015,0.000403479,-1.93942e-07,9.66054e-11,1.10055,0.000403091,-1.93652e-07,2.97727e-08,1.10096,0.000402793,-1.04334e-07,2.19273e-11,1.10136,0.000402585,-1.04268e-07,-2.98604e-08,1.10176,0.000402287,-1.93849e-07,2.10325e-10,1.10216,0.0004019,-1.93218e-07,2.90191e-08,1.10256,0.0004016,-1.06161e-07,2.92264e-09,1.10297,0.000401397,-9.73931e-08,-4.07096e-08,1.10337,0.00040108,-2.19522e-07,4.07067e-08,1.10377,0.000400763,-9.7402e-08,-2.90783e-09,1.10417,0.000400559,-1.06126e-07,-2.90754e-08,1.10457,0.00040026,-1.93352e-07,9.00021e-14,1.10497,0.000399873,-1.93351e-07,2.9075e-08,1.10537,0.000399574,-1.06126e-07,2.90902e-09,1.10577,0.00039937,-9.73992e-08,-4.07111e-08,1.10617,0.000399053,-2.19533e-07,4.07262e-08,1.10657,0.000398736,-9.73541e-08,-2.98424e-09,1.10697,0.000398533,-1.06307e-07,-2.87892e-08,1.10736,0.000398234,-1.92674e-07,-1.06824e-09,1.10776,0.000397845,-1.95879e-07,3.30622e-08,1.10816,0.000397552,-9.66926e-08,-1.19712e-08,1.10856,0.000397323,-1.32606e-07,1.48225e-08,1.10895,0.000397102,-8.81387e-08,-4.73187e-08,1.10935,0.000396784,-2.30095e-07,5.52429e-08,1.10975,0.00039649,-6.4366e-08,-5.44437e-08,1.11014,0.000396198,-2.27697e-07,4.33226e-08,1.11054,0.000395872,-9.77293e-08,3.62656e-10,1.11094,0.000395678,-9.66414e-08,-4.47732e-08,1.11133,0.00039535,-2.30961e-07,5.95208e-08,1.11173,0.000395067,-5.23985e-08,-7.41008e-08,1.11212,0.00039474,-2.74701e-07,1.17673e-07,1.11252,0.000394543,7.83181e-08,-1.58172e-07,1.11291,0.000394225,-3.96199e-07,1.57389e-07,1.1133,0.000393905,7.59679e-08,-1.13756e-07,1.1137,0.000393716,-2.653e-07,5.92165e-08,1.11409,0.000393363,-8.76507e-08,-3.90074e-09,1.11449,0.000393176,-9.93529e-08,-4.36136e-08,1.11488,0.000392846,-2.30194e-07,5.91457e-08,1.11527,0.000392563,-5.27564e-08,-7.376e-08,1.11566,0.000392237,-2.74037e-07,1.16685e-07,1.11606,0.000392039,7.60189e-08,-1.54562e-07,1.11645,0.000391727,-3.87667e-07,1.43935e-07,1.11684,0.000391384,4.4137e-08,-6.35487e-08,1.11723,0.000391281,-1.46509e-07,-8.94896e-09,1.11762,0.000390961,-1.73356e-07,-1.98647e-08,1.11801,0.000390555,-2.3295e-07,8.8408e-08,1.1184,0.000390354,3.22736e-08,-9.53486e-08,1.11879,0.000390133,-2.53772e-07,5.45677e-08,1.11918,0.000389789,-9.0069e-08,-3.71296e-09,1.11957,0.000389598,-1.01208e-07,-3.97159e-08,1.11996,0.000389276,-2.20355e-07,4.33671e-08,1.12035,0.000388966,-9.02542e-08,-1.45431e-08,1.12074,0.000388741,-1.33883e-07,1.48052e-08,1.12113,0.000388518,-8.94678e-08,-4.46778e-08,1.12152,0.000388205,-2.23501e-07,4.46966e-08,1.12191,0.000387892,-8.94114e-08,-1.48992e-08,1.12229,0.000387669,-1.34109e-07,1.49003e-08,1.12268,0.000387445,-8.94082e-08,-4.47019e-08,1.12307,0.000387132,-2.23514e-07,4.4698e-08,1.12345,0.000386819,-8.942e-08,-1.48806e-08,1.12384,0.000386596,-1.34062e-07,1.48245e-08,1.12423,0.000386372,-8.95885e-08,-4.44172e-08,1.12461,0.00038606,-2.2284e-07,4.36351e-08,1.125,0.000385745,-9.19348e-08,-1.09139e-08,1.12539,0.000385528,-1.24677e-07,2.05584e-11,1.12577,0.000385279,-1.24615e-07,1.08317e-08,1.12616,0.000385062,-9.21198e-08,-4.33473e-08,1.12654,0.000384748,-2.22162e-07,4.33481e-08,1.12693,0.000384434,-9.21174e-08,-1.08356e-08,1.12731,0.000384217,-1.24624e-07,-5.50907e-12,1.12769,0.000383968,-1.24641e-07,1.08577e-08,1.12808,0.000383751,-9.20679e-08,-4.34252e-08,1.12846,0.000383437,-2.22343e-07,4.36337e-08,1.12884,0.000383123,-9.14422e-08,-1.19005e-08,1.12923,0.000382904,-1.27144e-07,3.96813e-09,1.12961,0.000382662,-1.15239e-07,-3.97207e-09,1.12999,0.000382419,-1.27155e-07,1.19201e-08,1.13038,0.000382201,-9.1395e-08,-4.37085e-08,1.13076,0.000381887,-2.2252e-07,4.37046e-08,1.13114,0.000381573,-9.14068e-08,-1.19005e-08,1.13152,0.000381355,-1.27108e-07,3.89734e-09,1.1319,0.000381112,-1.15416e-07,-3.68887e-09,1.13228,0.00038087,-1.26483e-07,1.08582e-08,1.13266,0.00038065,-9.39083e-08,-3.97438e-08,1.13304,0.000380343,-2.1314e-07,2.89076e-08,1.13342,0.000380003,-1.26417e-07,4.33225e-08,1.1338,0.00037988,3.55072e-09,-8.29883e-08,1.13418,0.000379638,-2.45414e-07,5.0212e-08,1.13456,0.000379298,-9.47781e-08,1.34964e-09,1.13494,0.000379113,-9.07292e-08,-5.56105e-08,1.13532,0.000378764,-2.57561e-07,1.01883e-07,1.1357,0.000378555,4.80889e-08,-1.13504e-07,1.13608,0.000378311,-2.92423e-07,1.13713e-07,1.13646,0.000378067,4.87176e-08,-1.02931e-07,1.13683,0.000377856,-2.60076e-07,5.95923e-08,1.13721,0.000377514,-8.12988e-08,-1.62288e-08,1.13759,0.000377303,-1.29985e-07,5.32278e-09,1.13797,0.000377059,-1.14017e-07,-5.06237e-09,1.13834,0.000376816,-1.29204e-07,1.49267e-08,1.13872,0.000376602,-8.44237e-08,-5.46444e-08,1.1391,0.000376269,-2.48357e-07,8.44417e-08,1.13947,0.000376026,4.96815e-09,-4.47039e-08,1.13985,0.000375902,-1.29143e-07,-2.48355e-08,1.14023,0.000375569,-2.0365e-07,2.48368e-08,1.1406,0.000375236,-1.2914e-07,4.46977e-08,1.14098,0.000375112,4.95341e-09,-8.44184e-08,1.14135,0.000374869,-2.48302e-07,5.45572e-08,1.14173,0.000374536,-8.463e-08,-1.46013e-08,1.1421,0.000374323,-1.28434e-07,3.8478e-09,1.14247,0.000374077,-1.1689e-07,-7.89941e-10,1.14285,0.000373841,-1.1926e-07,-6.88042e-10,1.14322,0.0003736,-1.21324e-07,3.54213e-09,1.1436,0.000373368,-1.10698e-07,-1.34805e-08,1.14397,0.000373107,-1.51139e-07,5.03798e-08,1.14434,0.000372767,0.,0.}; + + template + __device__ __forceinline__ void RGB2LuvConvert_f(const T& src, D& dst) + { + const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3); + const float _un = 13 * (4 * 0.950456f * _d); + const float _vn = 13 * (9 * _d); + + float B = blueIdx == 0 ? src.x : src.z; + float G = src.y; + float R = blueIdx == 0 ? src.z : src.x; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + } + + float X = R * 0.412453f + G * 0.357580f + B * 0.180423f; + float Y = R * 0.212671f + G * 0.715160f + B * 0.072169f; + float Z = R * 0.019334f + G * 0.119193f + B * 0.950227f; + + float L = splineInterpolate(Y * (LAB_CBRT_TAB_SIZE / 1.5f), c_LabCbrtTab, LAB_CBRT_TAB_SIZE); + L = 116.f * L - 16.f; + + const float d = (4 * 13) / ::fmaxf(X + 15 * Y + 3 * Z, numeric_limits::epsilon()); + float u = L * (X * d - _un); + float v = L * ((9 * 0.25f) * Y * d - _vn); + + dst.x = L; + dst.y = u; + dst.z = v; + } + + template + __device__ __forceinline__ void RGB2LuvConvert_b(const T& src, D& dst) + { + float3 srcf, dstf; + + srcf.x = src.x * (1.f / 255.f); + srcf.y = src.y * (1.f / 255.f); + srcf.z = src.z * (1.f / 255.f); + + RGB2LuvConvert_f(srcf, dstf); + + dst.x = saturate_cast(dstf.x * 2.55f); + dst.y = saturate_cast(dstf.y * 0.72033898305084743f + 96.525423728813564f); + dst.z = saturate_cast(dstf.z * 0.9732824427480916f + 136.259541984732824f); + } + + template struct RGB2Luv; + template + struct RGB2Luv + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LuvConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} + }; + template + struct RGB2Luv + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LuvConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2Luv functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template + __device__ __forceinline__ void Luv2RGBConvert_f(const T& src, D& dst) + { + const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3); + const float _un = 4 * 0.950456f * _d; + const float _vn = 9 * _d; + + float L = src.x; + float u = src.y; + float v = src.z; + + float Y = (L + 16.f) * (1.f / 116.f); + Y = Y * Y * Y; + + float d = (1.f / 13.f) / L; + u = u * d + _un; + v = v * d + _vn; + + float iv = 1.f / v; + float X = 2.25f * u * Y * iv; + float Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv; + + float B = 0.055648f * X - 0.204043f * Y + 1.057311f * Z; + float G = -0.969256f * X + 1.875991f * Y + 0.041556f * Z; + float R = 3.240479f * X - 1.537150f * Y - 0.498535f * Z; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + } + + dst.x = blueIdx == 0 ? B : R; + dst.y = G; + dst.z = blueIdx == 0 ? R : B; + setAlpha(dst, ColorChannel::max()); + } + + template + __device__ __forceinline__ void Luv2RGBConvert_b(const T& src, D& dst) + { + float3 srcf, dstf; + + srcf.x = src.x * (100.f / 255.f); + srcf.y = src.y * 1.388235294117647f - 134.f; + srcf.z = src.z * 1.027450980392157f - 140.f; + + Luv2RGBConvert_f(srcf, dstf); + + dst.x = saturate_cast(dstf.x * 255.f); + dst.y = saturate_cast(dstf.y * 255.f); + dst.z = saturate_cast(dstf.z * 255.f); + setAlpha(dst, ColorChannel::max()); + } + + template struct Luv2RGB; + template + struct Luv2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Luv2RGBConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} + }; + template + struct Luv2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Luv2RGBConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Luv2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + #undef CV_DESCALE + +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_COLOR_DETAIL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/reduce.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/reduce.hpp new file mode 100644 index 0000000..8af20b0 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/reduce.hpp @@ -0,0 +1,365 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP +#define OPENCV_CUDA_REDUCE_DETAIL_HPP + +#include +#include "../warp.hpp" +#include "../warp_shuffle.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace reduce_detail + { + template struct GetType; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) + { + thrust::get(smem)[tid] = thrust::get(val); + + For::loadToSmem(smem, val, tid); + } + template + static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) + { + thrust::get(val) = thrust::get(smem)[tid]; + + For::loadFromSmem(smem, val, tid); + } + + template + static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) + { + typename GetType::type>::type reg = thrust::get(smem)[tid + delta]; + thrust::get(smem)[tid] = thrust::get(val) = thrust::get(op)(thrust::get(val), reg); + + For::merge(smem, val, tid, delta, op); + } + template + static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op) + { + typename GetType::type>::type reg = shfl_down(thrust::get(val), delta, width); + thrust::get(val) = thrust::get(op)(thrust::get(val), reg); + + For::mergeShfl(val, delta, width, op); + } + }; + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int) + { + } + template + static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int) + { + } + + template + static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&) + { + } + template + static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&) + { + } + }; + + template + __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid) + { + smem[tid] = val; + } + template + __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid) + { + val = smem[tid]; + } + template + __device__ __forceinline__ void loadToSmem(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadToSmem(smem, val, tid); + } + template + __device__ __forceinline__ void loadFromSmem(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadFromSmem(smem, val, tid); + } + + template + __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op) + { + T reg = smem[tid + delta]; + smem[tid] = val = op(val, reg); + } + template + __device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op) + { + T reg = shfl_down(val, delta, width); + val = op(val, reg); + } + template + __device__ __forceinline__ void merge(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid, + unsigned int delta, + const thrust::tuple& op) + { + For<0, thrust::tuple_size >::value>::merge(smem, val, tid, delta, op); + } + template + __device__ __forceinline__ void mergeShfl(const thrust::tuple& val, + unsigned int delta, + unsigned int width, + const thrust::tuple& op) + { + For<0, thrust::tuple_size >::value>::mergeShfl(val, delta, width, op); + } + + template struct Generic + { + template + static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) + { + loadToSmem(smem, val, tid); + if (N >= 32) + __syncthreads(); + + if (N >= 2048) + { + if (tid < 1024) + merge(smem, val, tid, 1024, op); + + __syncthreads(); + } + if (N >= 1024) + { + if (tid < 512) + merge(smem, val, tid, 512, op); + + __syncthreads(); + } + if (N >= 512) + { + if (tid < 256) + merge(smem, val, tid, 256, op); + + __syncthreads(); + } + if (N >= 256) + { + if (tid < 128) + merge(smem, val, tid, 128, op); + + __syncthreads(); + } + if (N >= 128) + { + if (tid < 64) + merge(smem, val, tid, 64, op); + + __syncthreads(); + } + if (N >= 64) + { + if (tid < 32) + merge(smem, val, tid, 32, op); + } + + if (tid < 16) + { + merge(smem, val, tid, 16, op); + merge(smem, val, tid, 8, op); + merge(smem, val, tid, 4, op); + merge(smem, val, tid, 2, op); + merge(smem, val, tid, 1, op); + } + } + }; + + template + struct Unroll + { + static __device__ void loopShfl(Reference val, Op op, unsigned int N) + { + mergeShfl(val, I, N, op); + Unroll::loopShfl(val, op, N); + } + static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op) + { + merge(smem, val, tid, I, op); + Unroll::loop(smem, val, tid, op); + } + }; + template + struct Unroll<0, Pointer, Reference, Op> + { + static __device__ void loopShfl(Reference, Op, unsigned int) + { + } + static __device__ void loop(Pointer, Reference, unsigned int, Op) + { + } + }; + + template struct WarpOptimized + { + template + static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + CV_UNUSED(smem); + CV_UNUSED(tid); + + Unroll::loopShfl(val, op, N); + #else + loadToSmem(smem, val, tid); + + if (tid < N / 2) + Unroll::loop(smem, val, tid, op); + #endif + } + }; + + template struct GenericOptimized32 + { + enum { M = N / 32 }; + + template + static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) + { + const unsigned int laneId = Warp::laneId(); + + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize); + + if (laneId == 0) + loadToSmem(smem, val, tid / 32); + #else + loadToSmem(smem, val, tid); + + if (laneId < 16) + Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); + + __syncthreads(); + + if (laneId == 0) + loadToSmem(smem, val, tid / 32); + #endif + + __syncthreads(); + + loadFromSmem(smem, val, tid); + + if (tid < 32) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + Unroll::loopShfl(val, op, M); + #else + Unroll::loop(smem, val, tid, op); + #endif + } + } + }; + + template struct StaticIf; + template struct StaticIf + { + typedef T1 type; + }; + template struct StaticIf + { + typedef T2 type; + }; + + template struct IsPowerOf2 + { + enum { value = ((N != 0) && !(N & (N - 1))) }; + }; + + template struct Dispatcher + { + typedef typename StaticIf< + (N <= 32) && IsPowerOf2::value, + WarpOptimized, + typename StaticIf< + (N <= 1024) && IsPowerOf2::value, + GenericOptimized32, + Generic + >::type + >::type reductor; + }; + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/reduce_key_val.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/reduce_key_val.hpp new file mode 100644 index 0000000..df37c17 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/reduce_key_val.hpp @@ -0,0 +1,502 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP +#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP + +#include +#include "../warp.hpp" +#include "../warp_shuffle.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace reduce_key_val_detail + { + template struct GetType; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) + { + thrust::get(smem)[tid] = thrust::get(data); + + For::loadToSmem(smem, data, tid); + } + template + static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) + { + thrust::get(data) = thrust::get(smem)[tid]; + + For::loadFromSmem(smem, data, tid); + } + + template + static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width) + { + thrust::get(val) = shfl_down(thrust::get(val), delta, width); + + For::copyShfl(val, delta, width); + } + template + static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta) + { + thrust::get(svals)[tid] = thrust::get(val) = thrust::get(svals)[tid + delta]; + + For::copy(svals, val, tid, delta); + } + + template + static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width) + { + typename GetType::type>::type reg = shfl_down(thrust::get(key), delta, width); + + if (thrust::get(cmp)(reg, thrust::get(key))) + { + thrust::get(key) = reg; + thrust::get(val) = shfl_down(thrust::get(val), delta, width); + } + + For::mergeShfl(key, val, cmp, delta, width); + } + template + static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key, + const ValPointerTuple& svals, const ValReferenceTuple& val, + const CmpTuple& cmp, + unsigned int tid, unsigned int delta) + { + typename GetType::type>::type reg = thrust::get(skeys)[tid + delta]; + + if (thrust::get(cmp)(reg, thrust::get(key))) + { + thrust::get(skeys)[tid] = thrust::get(key) = reg; + thrust::get(svals)[tid] = thrust::get(val) = thrust::get(svals)[tid + delta]; + } + + For::merge(skeys, key, svals, val, cmp, tid, delta); + } + }; + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) + { + } + template + static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) + { + } + + template + static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int) + { + } + template + static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int) + { + } + + template + static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int) + { + } + template + static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&, + const ValPointerTuple&, const ValReferenceTuple&, + const CmpTuple&, + unsigned int, unsigned int) + { + } + }; + + ////////////////////////////////////////////////////// + // loadToSmem + + template + __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid) + { + smem[tid] = data; + } + template + __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid) + { + data = smem[tid]; + } + template + __device__ __forceinline__ void loadToSmem(const thrust::tuple& smem, + const thrust::tuple& data, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadToSmem(smem, data, tid); + } + template + __device__ __forceinline__ void loadFromSmem(const thrust::tuple& smem, + const thrust::tuple& data, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadFromSmem(smem, data, tid); + } + + ////////////////////////////////////////////////////// + // copyVals + + template + __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width) + { + val = shfl_down(val, delta, width); + } + template + __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta) + { + svals[tid] = val = svals[tid + delta]; + } + template + __device__ __forceinline__ void copyValsShfl(const thrust::tuple& val, + unsigned int delta, + int width) + { + For<0, thrust::tuple_size >::value>::copyShfl(val, delta, width); + } + template + __device__ __forceinline__ void copyVals(const thrust::tuple& svals, + const thrust::tuple& val, + unsigned int tid, unsigned int delta) + { + For<0, thrust::tuple_size >::value>::copy(svals, val, tid, delta); + } + + ////////////////////////////////////////////////////// + // merge + + template + __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width) + { + K reg = shfl_down(key, delta, width); + + if (cmp(reg, key)) + { + key = reg; + copyValsShfl(val, delta, width); + } + } + template + __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta) + { + K reg = skeys[tid + delta]; + + if (cmp(reg, key)) + { + skeys[tid] = key = reg; + copyVals(svals, val, tid, delta); + } + } + template + __device__ __forceinline__ void mergeShfl(K& key, + const thrust::tuple& val, + const Cmp& cmp, + unsigned int delta, int width) + { + K reg = shfl_down(key, delta, width); + + if (cmp(reg, key)) + { + key = reg; + copyValsShfl(val, delta, width); + } + } + template + __device__ __forceinline__ void merge(volatile K* skeys, K& key, + const thrust::tuple& svals, + const thrust::tuple& val, + const Cmp& cmp, unsigned int tid, unsigned int delta) + { + K reg = skeys[tid + delta]; + + if (cmp(reg, key)) + { + skeys[tid] = key = reg; + copyVals(svals, val, tid, delta); + } + } + template + __device__ __forceinline__ void mergeShfl(const thrust::tuple& key, + const thrust::tuple& val, + const thrust::tuple& cmp, + unsigned int delta, int width) + { + For<0, thrust::tuple_size >::value>::mergeShfl(key, val, cmp, delta, width); + } + template + __device__ __forceinline__ void merge(const thrust::tuple& skeys, + const thrust::tuple& key, + const thrust::tuple& svals, + const thrust::tuple& val, + const thrust::tuple& cmp, + unsigned int tid, unsigned int delta) + { + For<0, thrust::tuple_size >::value>::merge(skeys, key, svals, val, cmp, tid, delta); + } + + ////////////////////////////////////////////////////// + // Generic + + template struct Generic + { + template + static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + loadToSmem(skeys, key, tid); + loadValsToSmem(svals, val, tid); + if (N >= 32) + __syncthreads(); + + if (N >= 2048) + { + if (tid < 1024) + merge(skeys, key, svals, val, cmp, tid, 1024); + + __syncthreads(); + } + if (N >= 1024) + { + if (tid < 512) + merge(skeys, key, svals, val, cmp, tid, 512); + + __syncthreads(); + } + if (N >= 512) + { + if (tid < 256) + merge(skeys, key, svals, val, cmp, tid, 256); + + __syncthreads(); + } + if (N >= 256) + { + if (tid < 128) + merge(skeys, key, svals, val, cmp, tid, 128); + + __syncthreads(); + } + if (N >= 128) + { + if (tid < 64) + merge(skeys, key, svals, val, cmp, tid, 64); + + __syncthreads(); + } + if (N >= 64) + { + if (tid < 32) + merge(skeys, key, svals, val, cmp, tid, 32); + } + + if (tid < 16) + { + merge(skeys, key, svals, val, cmp, tid, 16); + merge(skeys, key, svals, val, cmp, tid, 8); + merge(skeys, key, svals, val, cmp, tid, 4); + merge(skeys, key, svals, val, cmp, tid, 2); + merge(skeys, key, svals, val, cmp, tid, 1); + } + } + }; + + template + struct Unroll + { + static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N) + { + mergeShfl(key, val, cmp, I, N); + Unroll::loopShfl(key, val, cmp, N); + } + static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + merge(skeys, key, svals, val, cmp, tid, I); + Unroll::loop(skeys, key, svals, val, tid, cmp); + } + }; + template + struct Unroll<0, KP, KR, VP, VR, Cmp> + { + static __device__ void loopShfl(KR, VR, Cmp, unsigned int) + { + } + static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp) + { + } + }; + + template struct WarpOptimized + { + template + static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + #if 0 // __CUDA_ARCH__ >= 300 + CV_UNUSED(skeys); + CV_UNUSED(svals); + CV_UNUSED(tid); + + Unroll::loopShfl(key, val, cmp, N); + #else + loadToSmem(skeys, key, tid); + loadToSmem(svals, val, tid); + + if (tid < N / 2) + Unroll::loop(skeys, key, svals, val, tid, cmp); + #endif + } + }; + + template struct GenericOptimized32 + { + enum { M = N / 32 }; + + template + static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + const unsigned int laneId = Warp::laneId(); + + #if 0 // __CUDA_ARCH__ >= 300 + Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize); + + if (laneId == 0) + { + loadToSmem(skeys, key, tid / 32); + loadToSmem(svals, val, tid / 32); + } + #else + loadToSmem(skeys, key, tid); + loadToSmem(svals, val, tid); + + if (laneId < 16) + Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); + + __syncthreads(); + + if (laneId == 0) + { + loadToSmem(skeys, key, tid / 32); + loadToSmem(svals, val, tid / 32); + } + #endif + + __syncthreads(); + + loadFromSmem(skeys, key, tid); + + if (tid < 32) + { + #if 0 // __CUDA_ARCH__ >= 300 + loadFromSmem(svals, val, tid); + + Unroll::loopShfl(key, val, cmp, M); + #else + Unroll::loop(skeys, key, svals, val, tid, cmp); + #endif + } + } + }; + + template struct StaticIf; + template struct StaticIf + { + typedef T1 type; + }; + template struct StaticIf + { + typedef T2 type; + }; + + template struct IsPowerOf2 + { + enum { value = ((N != 0) && !(N & (N - 1))) }; + }; + + template struct Dispatcher + { + typedef typename StaticIf< + (N <= 32) && IsPowerOf2::value, + WarpOptimized, + typename StaticIf< + (N <= 1024) && IsPowerOf2::value, + GenericOptimized32, + Generic + >::type + >::type reductor; + }; + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/transform_detail.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/transform_detail.hpp new file mode 100644 index 0000000..1919848 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/transform_detail.hpp @@ -0,0 +1,392 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP +#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP + +#include "../common.hpp" +#include "../vec_traits.hpp" +#include "../functional.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace transform_detail + { + //! Read Write Traits + + template struct UnaryReadWriteTraits + { + typedef typename TypeVec::vec_type read_type; + typedef typename TypeVec::vec_type write_type; + }; + + template struct BinaryReadWriteTraits + { + typedef typename TypeVec::vec_type read_type1; + typedef typename TypeVec::vec_type read_type2; + typedef typename TypeVec::vec_type write_type; + }; + + //! Transform kernels + + template struct OpUnroller; + template <> struct OpUnroller<1> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + } + }; + template <> struct OpUnroller<2> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + } + }; + template <> struct OpUnroller<3> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src.z); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src1.z, src2.z); + } + }; + template <> struct OpUnroller<4> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src.z); + if (mask(y, x_shifted + 3)) + dst.w = op(src.w); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src1.z, src2.z); + if (mask(y, x_shifted + 3)) + dst.w = op(src1.w, src2.w); + } + }; + template <> struct OpUnroller<8> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.a0 = op(src.a0); + if (mask(y, x_shifted + 1)) + dst.a1 = op(src.a1); + if (mask(y, x_shifted + 2)) + dst.a2 = op(src.a2); + if (mask(y, x_shifted + 3)) + dst.a3 = op(src.a3); + if (mask(y, x_shifted + 4)) + dst.a4 = op(src.a4); + if (mask(y, x_shifted + 5)) + dst.a5 = op(src.a5); + if (mask(y, x_shifted + 6)) + dst.a6 = op(src.a6); + if (mask(y, x_shifted + 7)) + dst.a7 = op(src.a7); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.a0 = op(src1.a0, src2.a0); + if (mask(y, x_shifted + 1)) + dst.a1 = op(src1.a1, src2.a1); + if (mask(y, x_shifted + 2)) + dst.a2 = op(src1.a2, src2.a2); + if (mask(y, x_shifted + 3)) + dst.a3 = op(src1.a3, src2.a3); + if (mask(y, x_shifted + 4)) + dst.a4 = op(src1.a4, src2.a4); + if (mask(y, x_shifted + 5)) + dst.a5 = op(src1.a5, src2.a5); + if (mask(y, x_shifted + 6)) + dst.a6 = op(src1.a6, src2.a6); + if (mask(y, x_shifted + 7)) + dst.a7 = op(src1.a7, src2.a7); + } + }; + + template + static __global__ void transformSmart(const PtrStepSz src_, PtrStep dst_, const Mask mask, const UnOp op) + { + typedef TransformFunctorTraits ft; + typedef typename UnaryReadWriteTraits::read_type read_type; + typedef typename UnaryReadWriteTraits::write_type write_type; + + const int x = threadIdx.x + blockIdx.x * blockDim.x; + const int y = threadIdx.y + blockIdx.y * blockDim.y; + const int x_shifted = x * ft::smart_shift; + + if (y < src_.rows) + { + const T* src = src_.ptr(y); + D* dst = dst_.ptr(y); + + if (x_shifted + ft::smart_shift - 1 < src_.cols) + { + const read_type src_n_el = ((const read_type*)src)[x]; + OpUnroller::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y); + } + else + { + for (int real_x = x_shifted; real_x < src_.cols; ++real_x) + { + if (mask(y, real_x)) + dst[real_x] = op(src[real_x]); + } + } + } + } + + template + __global__ static void transformSimple(const PtrStepSz src, PtrStep dst, const Mask mask, const UnOp op) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < src.cols && y < src.rows && mask(y, x)) + { + dst.ptr(y)[x] = op(src.ptr(y)[x]); + } + } + + template + static __global__ void transformSmart(const PtrStepSz src1_, const PtrStep src2_, PtrStep dst_, + const Mask mask, const BinOp op) + { + typedef TransformFunctorTraits ft; + typedef typename BinaryReadWriteTraits::read_type1 read_type1; + typedef typename BinaryReadWriteTraits::read_type2 read_type2; + typedef typename BinaryReadWriteTraits::write_type write_type; + + const int x = threadIdx.x + blockIdx.x * blockDim.x; + const int y = threadIdx.y + blockIdx.y * blockDim.y; + const int x_shifted = x * ft::smart_shift; + + if (y < src1_.rows) + { + const T1* src1 = src1_.ptr(y); + const T2* src2 = src2_.ptr(y); + D* dst = dst_.ptr(y); + + if (x_shifted + ft::smart_shift - 1 < src1_.cols) + { + const read_type1 src1_n_el = ((const read_type1*)src1)[x]; + const read_type2 src2_n_el = ((const read_type2*)src2)[x]; + + OpUnroller::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y); + } + else + { + for (int real_x = x_shifted; real_x < src1_.cols; ++real_x) + { + if (mask(y, real_x)) + dst[real_x] = op(src1[real_x], src2[real_x]); + } + } + } + } + + template + static __global__ void transformSimple(const PtrStepSz src1, const PtrStep src2, PtrStep dst, + const Mask mask, const BinOp op) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < src1.cols && y < src1.rows && mask(y, x)) + { + const T1 src1_data = src1.ptr(y)[x]; + const T2 src2_data = src2.ptr(y)[x]; + dst.ptr(y)[x] = op(src1_data, src2_data); + } + } + + template struct TransformDispatcher; + template<> struct TransformDispatcher + { + template + static void call(PtrStepSz src, PtrStepSz dst, UnOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1); + const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1); + + transformSimple<<>>(src, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template + static void call(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, BinOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1); + const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1); + + transformSimple<<>>(src1, src2, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template<> struct TransformDispatcher + { + template + static void call(PtrStepSz src, PtrStepSz dst, UnOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + CV_StaticAssert(ft::smart_shift != 1, ""); + + if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) || + !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D))) + { + TransformDispatcher::call(src, dst, op, mask, stream); + return; + } + + const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1); + const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1); + + transformSmart<<>>(src, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template + static void call(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, BinOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + CV_StaticAssert(ft::smart_shift != 1, ""); + + if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) || + !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) || + !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D))) + { + TransformDispatcher::call(src1, src2, dst, op, mask, stream); + return; + } + + const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1); + const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1); + + transformSmart<<>>(src1, src2, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + } // namespace transform_detail +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/type_traits_detail.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/type_traits_detail.hpp new file mode 100644 index 0000000..a78bd2c --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/type_traits_detail.hpp @@ -0,0 +1,191 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP +#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP + +#include "../common.hpp" +#include "../vec_traits.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace type_traits_detail + { + template struct Select { typedef T1 type; }; + template struct Select { typedef T2 type; }; + + template struct IsSignedIntergral { enum {value = 0}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + + template struct IsUnsignedIntegral { enum {value = 0}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + + template struct IsIntegral { enum {value = IsSignedIntergral::value || IsUnsignedIntegral::value}; }; + template <> struct IsIntegral { enum {value = 1}; }; + template <> struct IsIntegral { enum {value = 1}; }; + + template struct IsFloat { enum {value = 0}; }; + template <> struct IsFloat { enum {value = 1}; }; + template <> struct IsFloat { enum {value = 1}; }; + + template struct IsVec { enum {value = 0}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + + template struct AddParameterType { typedef const U& type; }; + template struct AddParameterType { typedef U& type; }; + template <> struct AddParameterType { typedef void type; }; + + template struct ReferenceTraits + { + enum { value = false }; + typedef U type; + }; + template struct ReferenceTraits + { + enum { value = true }; + typedef U type; + }; + + template struct PointerTraits + { + enum { value = false }; + typedef void type; + }; + template struct PointerTraits + { + enum { value = true }; + typedef U type; + }; + template struct PointerTraits + { + enum { value = true }; + typedef U type; + }; + + template struct UnConst + { + typedef U type; + enum { value = 0 }; + }; + template struct UnConst + { + typedef U type; + enum { value = 1 }; + }; + template struct UnConst + { + typedef U& type; + enum { value = 1 }; + }; + + template struct UnVolatile + { + typedef U type; + enum { value = 0 }; + }; + template struct UnVolatile + { + typedef U type; + enum { value = 1 }; + }; + template struct UnVolatile + { + typedef U& type; + enum { value = 1 }; + }; + } // namespace type_traits_detail +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/vec_distance_detail.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/vec_distance_detail.hpp new file mode 100644 index 0000000..8283a99 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/detail/vec_distance_detail.hpp @@ -0,0 +1,121 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP +#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP + +#include "../datamov_utils.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace vec_distance_detail + { + template struct UnrollVecDiffCached + { + template + static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind) + { + if (ind < len) + { + T1 val1 = *vecCached++; + + T2 val2; + ForceGlob::Load(vecGlob, ind, val2); + + dist.reduceIter(val1, val2); + + UnrollVecDiffCached::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM); + } + } + + template + static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist) + { + T1 val1 = *vecCached++; + + T2 val2; + ForceGlob::Load(vecGlob, 0, val2); + vecGlob += THREAD_DIM; + + dist.reduceIter(val1, val2); + + UnrollVecDiffCached::calcWithoutCheck(vecCached, vecGlob, dist); + } + }; + template struct UnrollVecDiffCached + { + template + static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int) + { + } + + template + static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&) + { + } + }; + + template struct VecDiffCachedCalculator; + template struct VecDiffCachedCalculator + { + template + static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid) + { + UnrollVecDiffCached::calcCheck(vecCached, vecGlob, len, dist, tid); + } + }; + template struct VecDiffCachedCalculator + { + template + static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid) + { + UnrollVecDiffCached::calcWithoutCheck(vecCached, vecGlob + tid, dist); + } + }; + } // namespace vec_distance_detail +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/dynamic_smem.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/dynamic_smem.hpp new file mode 100644 index 0000000..42570c6 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/dynamic_smem.hpp @@ -0,0 +1,88 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP +#define OPENCV_CUDA_DYNAMIC_SMEM_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct DynamicSharedMem + { + __device__ __forceinline__ operator T*() + { + extern __shared__ int __smem[]; + return (T*)__smem; + } + + __device__ __forceinline__ operator const T*() const + { + extern __shared__ int __smem[]; + return (T*)__smem; + } + }; + + // specialize for double to avoid unaligned memory access compile errors + template<> struct DynamicSharedMem + { + __device__ __forceinline__ operator double*() + { + extern __shared__ double __smem_d[]; + return (double*)__smem_d; + } + + __device__ __forceinline__ operator const double*() const + { + extern __shared__ double __smem_d[]; + return (double*)__smem_d; + } + }; +}}} + +//! @endcond + +#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/emulation.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/emulation.hpp new file mode 100644 index 0000000..17dc117 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/emulation.hpp @@ -0,0 +1,269 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_EMULATION_HPP_ +#define OPENCV_CUDA_EMULATION_HPP_ + +#include "common.hpp" +#include "warp_reduce.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct Emulation + { + + static __device__ __forceinline__ int syncthreadsOr(int pred) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) + // just campilation stab + return 0; +#else + return __syncthreads_or(pred); +#endif + } + + template + static __forceinline__ __device__ int Ballot(int predicate) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) + return __ballot(predicate); +#else + __shared__ volatile int cta_buffer[CTA_SIZE]; + + int tid = threadIdx.x; + cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0; + return warp_reduce(cta_buffer); +#endif + } + + struct smem + { + enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U }; + + template + static __device__ __forceinline__ T atomicInc(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count; + unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); + do + { + count = *address & TAG_MASK; + count = tag | (count + 1); + *address = count; + } while (*address != count); + + return (count & TAG_MASK) - 1; +#else + return ::atomicInc(address, val); +#endif + } + + template + static __device__ __forceinline__ T atomicAdd(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count; + unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); + do + { + count = *address & TAG_MASK; + count = tag | (count + val); + *address = count; + } while (*address != count); + + return (count & TAG_MASK) - val; +#else + return ::atomicAdd(address, val); +#endif + } + + template + static __device__ __forceinline__ T atomicMin(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count = ::min(*address, val); + do + { + *address = count; + } while (*address > count); + + return count; +#else + return ::atomicMin(address, val); +#endif + } + }; // struct cmem + + struct glob + { + static __device__ __forceinline__ int atomicAdd(int* address, int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ float atomicAdd(float* address, float val) + { + #if __CUDA_ARCH__ >= 200 + return ::atomicAdd(address, val); + #else + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(val + __int_as_float(assumed))); + } while (assumed != old); + return __int_as_float(old); + #endif + } + static __device__ __forceinline__ double atomicAdd(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMin(int* address, int val) + { + return ::atomicMin(address, val); + } + static __device__ __forceinline__ float atomicMin(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fminf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMin(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMax(int* address, int val) + { + return ::atomicMax(address, val); + } + static __device__ __forceinline__ float atomicMax(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fmaxf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMax(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0; + #endif + } + }; + }; //struct Emulation +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif /* OPENCV_CUDA_EMULATION_HPP_ */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/filters.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/filters.hpp new file mode 100644 index 0000000..bb94212 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/filters.hpp @@ -0,0 +1,286 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_FILTERS_HPP +#define OPENCV_CUDA_FILTERS_HPP + +#include "saturate_cast.hpp" +#include "vec_traits.hpp" +#include "vec_math.hpp" +#include "type_traits.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct PointFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) + : src(src_) + { + CV_UNUSED(fx); + CV_UNUSED(fy); + } + + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + return src(__float2int_rz(y), __float2int_rz(x)); + } + + Ptr2D src; + }; + + template struct LinearFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) + : src(src_) + { + CV_UNUSED(fx); + CV_UNUSED(fy); + } + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + typedef typename TypeVec::cn>::vec_type work_type; + + work_type out = VecTraits::all(0); + + const int x1 = __float2int_rd(x); + const int y1 = __float2int_rd(y); + const int x2 = x1 + 1; + const int y2 = y1 + 1; + + elem_type src_reg = src(y1, x1); + out = out + src_reg * ((x2 - x) * (y2 - y)); + + src_reg = src(y1, x2); + out = out + src_reg * ((x - x1) * (y2 - y)); + + src_reg = src(y2, x1); + out = out + src_reg * ((x2 - x) * (y - y1)); + + src_reg = src(y2, x2); + out = out + src_reg * ((x - x1) * (y - y1)); + + return saturate_cast(out); + } + + Ptr2D src; + }; + + template struct CubicFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + typedef typename TypeVec::cn>::vec_type work_type; + + explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) + : src(src_) + { + CV_UNUSED(fx); + CV_UNUSED(fy); + } + + static __device__ __forceinline__ float bicubicCoeff(float x_) + { + float x = fabsf(x_); + if (x <= 1.0f) + { + return x * x * (1.5f * x - 2.5f) + 1.0f; + } + else if (x < 2.0f) + { + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + } + else + { + return 0.0f; + } + } + + __device__ elem_type operator ()(float y, float x) const + { + const float xmin = ::ceilf(x - 2.0f); + const float xmax = ::floorf(x + 2.0f); + + const float ymin = ::ceilf(y - 2.0f); + const float ymax = ::floorf(y + 2.0f); + + work_type sum = VecTraits::all(0); + float wsum = 0.0f; + + for (float cy = ymin; cy <= ymax; cy += 1.0f) + { + for (float cx = xmin; cx <= xmax; cx += 1.0f) + { + const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy); + sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx)); + wsum += w; + } + } + + work_type res = (!wsum)? VecTraits::all(0) : sum / wsum; + + return saturate_cast(res); + } + + Ptr2D src; + }; + // for integer scaling + template struct IntegerAreaFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_) + : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {} + + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + float fsx1 = x * scale_x; + float fsx2 = fsx1 + scale_x; + + int sx1 = __float2int_ru(fsx1); + int sx2 = __float2int_rd(fsx2); + + float fsy1 = y * scale_y; + float fsy2 = fsy1 + scale_y; + + int sy1 = __float2int_ru(fsy1); + int sy2 = __float2int_rd(fsy2); + + typedef typename TypeVec::cn>::vec_type work_type; + work_type out = VecTraits::all(0.f); + + for(int dy = sy1; dy < sy2; ++dy) + for(int dx = sx1; dx < sx2; ++dx) + { + out = out + src(dy, dx) * scale; + } + + return saturate_cast(out); + } + + Ptr2D src; + float scale_x, scale_y ,scale; + }; + + template struct AreaFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_) + : src(src_), scale_x(scale_x_), scale_y(scale_y_){} + + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + float fsx1 = x * scale_x; + float fsx2 = fsx1 + scale_x; + + int sx1 = __float2int_ru(fsx1); + int sx2 = __float2int_rd(fsx2); + + float fsy1 = y * scale_y; + float fsy2 = fsy1 + scale_y; + + int sy1 = __float2int_ru(fsy1); + int sy2 = __float2int_rd(fsy2); + + float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1)); + + typedef typename TypeVec::cn>::vec_type work_type; + work_type out = VecTraits::all(0.f); + + for (int dy = sy1; dy < sy2; ++dy) + { + for (int dx = sx1; dx < sx2; ++dx) + out = out + src(dy, dx) * scale; + + if (sx1 > fsx1) + out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale); + + if (sx2 < fsx2) + out = out + src(dy, sx2) * ((fsx2 -sx2) * scale); + } + + if (sy1 > fsy1) + for (int dx = sx1; dx < sx2; ++dx) + out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale); + + if (sy2 < fsy2) + for (int dx = sx1; dx < sx2; ++dx) + out = out + src(sy2, dx) * ((fsy2 -sy2) * scale); + + if ((sy1 > fsy1) && (sx1 > fsx1)) + out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale); + + if ((sy1 > fsy1) && (sx2 < fsx2)) + out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale); + + if ((sy2 < fsy2) && (sx2 < fsx2)) + out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale); + + if ((sy2 < fsy2) && (sx1 > fsx1)) + out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale); + + return saturate_cast(out); + } + + Ptr2D src; + float scale_x, scale_y; + int width, haight; + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_FILTERS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/funcattrib.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/funcattrib.hpp new file mode 100644 index 0000000..f582080 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/funcattrib.hpp @@ -0,0 +1,79 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP +#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP + +#include + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + void printFuncAttrib(Func& func) + { + + cudaFuncAttributes attrs; + cudaFuncGetAttributes(&attrs, func); + + printf("=== Function stats ===\n"); + printf("Name: \n"); + printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes); + printf("constSizeBytes = %d\n", attrs.constSizeBytes); + printf("localSizeBytes = %d\n", attrs.localSizeBytes); + printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock); + printf("numRegs = %d\n", attrs.numRegs); + printf("ptxVersion = %d\n", attrs.ptxVersion); + printf("binaryVersion = %d\n", attrs.binaryVersion); + printf("\n"); + fflush(stdout); + } +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/functional.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/functional.hpp new file mode 100644 index 0000000..9f53d87 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/functional.hpp @@ -0,0 +1,805 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_FUNCTIONAL_HPP +#define OPENCV_CUDA_FUNCTIONAL_HPP + +#include +#include "saturate_cast.hpp" +#include "vec_traits.hpp" +#include "type_traits.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + // Function Objects + template struct unary_function + { + typedef Argument argument_type; + typedef Result result_type; + }; + template struct binary_function + { + typedef Argument1 first_argument_type; + typedef Argument2 second_argument_type; + typedef Result result_type; + }; + + // Arithmetic Operations + template struct plus : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a + b; + } + __host__ __device__ __forceinline__ plus() {} + __host__ __device__ __forceinline__ plus(const plus&) {} + }; + + template struct minus : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a - b; + } + __host__ __device__ __forceinline__ minus() {} + __host__ __device__ __forceinline__ minus(const minus&) {} + }; + + template struct multiplies : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a * b; + } + __host__ __device__ __forceinline__ multiplies() {} + __host__ __device__ __forceinline__ multiplies(const multiplies&) {} + }; + + template struct divides : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a / b; + } + __host__ __device__ __forceinline__ divides() {} + __host__ __device__ __forceinline__ divides(const divides&) {} + }; + + template struct modulus : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a % b; + } + __host__ __device__ __forceinline__ modulus() {} + __host__ __device__ __forceinline__ modulus(const modulus&) {} + }; + + template struct negate : unary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a) const + { + return -a; + } + __host__ __device__ __forceinline__ negate() {} + __host__ __device__ __forceinline__ negate(const negate&) {} + }; + + // Comparison Operations + template struct equal_to : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a == b; + } + __host__ __device__ __forceinline__ equal_to() {} + __host__ __device__ __forceinline__ equal_to(const equal_to&) {} + }; + + template struct not_equal_to : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a != b; + } + __host__ __device__ __forceinline__ not_equal_to() {} + __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {} + }; + + template struct greater : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a > b; + } + __host__ __device__ __forceinline__ greater() {} + __host__ __device__ __forceinline__ greater(const greater&) {} + }; + + template struct less : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a < b; + } + __host__ __device__ __forceinline__ less() {} + __host__ __device__ __forceinline__ less(const less&) {} + }; + + template struct greater_equal : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a >= b; + } + __host__ __device__ __forceinline__ greater_equal() {} + __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {} + }; + + template struct less_equal : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a <= b; + } + __host__ __device__ __forceinline__ less_equal() {} + __host__ __device__ __forceinline__ less_equal(const less_equal&) {} + }; + + // Logical Operations + template struct logical_and : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a && b; + } + __host__ __device__ __forceinline__ logical_and() {} + __host__ __device__ __forceinline__ logical_and(const logical_and&) {} + }; + + template struct logical_or : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a || b; + } + __host__ __device__ __forceinline__ logical_or() {} + __host__ __device__ __forceinline__ logical_or(const logical_or&) {} + }; + + template struct logical_not : unary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a) const + { + return !a; + } + __host__ __device__ __forceinline__ logical_not() {} + __host__ __device__ __forceinline__ logical_not(const logical_not&) {} + }; + + // Bitwise Operations + template struct bit_and : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a & b; + } + __host__ __device__ __forceinline__ bit_and() {} + __host__ __device__ __forceinline__ bit_and(const bit_and&) {} + }; + + template struct bit_or : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a | b; + } + __host__ __device__ __forceinline__ bit_or() {} + __host__ __device__ __forceinline__ bit_or(const bit_or&) {} + }; + + template struct bit_xor : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a ^ b; + } + __host__ __device__ __forceinline__ bit_xor() {} + __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {} + }; + + template struct bit_not : unary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType v) const + { + return ~v; + } + __host__ __device__ __forceinline__ bit_not() {} + __host__ __device__ __forceinline__ bit_not(const bit_not&) {} + }; + + // Generalized Identity Operations + template struct identity : unary_function + { + __device__ __forceinline__ typename TypeTraits::ParameterType operator()(typename TypeTraits::ParameterType x) const + { + return x; + } + __host__ __device__ __forceinline__ identity() {} + __host__ __device__ __forceinline__ identity(const identity&) {} + }; + + template struct project1st : binary_function + { + __device__ __forceinline__ typename TypeTraits::ParameterType operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return lhs; + } + __host__ __device__ __forceinline__ project1st() {} + __host__ __device__ __forceinline__ project1st(const project1st&) {} + }; + + template struct project2nd : binary_function + { + __device__ __forceinline__ typename TypeTraits::ParameterType operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return rhs; + } + __host__ __device__ __forceinline__ project2nd() {} + __host__ __device__ __forceinline__ project2nd(const project2nd&) {} + }; + + // Min/Max Operations + +#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \ + template <> struct name : binary_function \ + { \ + __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \ + __host__ __device__ __forceinline__ name() {}\ + __host__ __device__ __forceinline__ name(const name&) {}\ + }; + + template struct maximum : binary_function + { + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return max(lhs, rhs); + } + __host__ __device__ __forceinline__ maximum() {} + __host__ __device__ __forceinline__ maximum(const maximum&) {} + }; + + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax) + + template struct minimum : binary_function + { + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return min(lhs, rhs); + } + __host__ __device__ __forceinline__ minimum() {} + __host__ __device__ __forceinline__ minimum(const minimum&) {} + }; + + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin) + +#undef OPENCV_CUDA_IMPLEMENT_MINMAX + + // Math functions + + template struct abs_func : unary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType x) const + { + return abs(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned char operator ()(unsigned char x) const + { + return x; + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ signed char operator ()(signed char x) const + { + return ::abs((int)x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ char operator ()(char x) const + { + return ::abs((int)x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned short operator ()(unsigned short x) const + { + return x; + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ short operator ()(short x) const + { + return ::abs((int)x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned int operator ()(unsigned int x) const + { + return x; + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ int operator ()(int x) const + { + return ::abs(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ float operator ()(float x) const + { + return ::fabsf(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ double operator ()(double x) const + { + return ::fabs(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + +#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \ + template struct name ## _func : unary_function \ + { \ + __device__ __forceinline__ float operator ()(typename TypeTraits::ParameterType v) const \ + { \ + return func ## f(v); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; \ + template <> struct name ## _func : unary_function \ + { \ + __device__ __forceinline__ double operator ()(double v) const \ + { \ + return func(v); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; + +#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \ + template struct name ## _func : binary_function \ + { \ + __device__ __forceinline__ float operator ()(typename TypeTraits::ParameterType v1, typename TypeTraits::ParameterType v2) const \ + { \ + return func ## f(v1, v2); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; \ + template <> struct name ## _func : binary_function \ + { \ + __device__ __forceinline__ double operator ()(double v1, double v2) const \ + { \ + return func(v1, v2); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; + + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh) + + OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot) + OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2) + OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow) + + #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR + #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE + #undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR + + template struct hypot_sqr_func : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType src1, typename TypeTraits::ParameterType src2) const + { + return src1 * src1 + src2 * src2; + } + __host__ __device__ __forceinline__ hypot_sqr_func() {} + __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {} + }; + + // Saturate Cast Functor + template struct saturate_cast_func : unary_function + { + __device__ __forceinline__ D operator ()(typename TypeTraits::ParameterType v) const + { + return saturate_cast(v); + } + __host__ __device__ __forceinline__ saturate_cast_func() {} + __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {} + }; + + // Threshold Functors + template struct thresh_binary_func : unary_function + { + __host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src > thresh) * maxVal; + } + + __host__ __device__ __forceinline__ thresh_binary_func() {} + __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} + + T thresh; + T maxVal; + }; + + template struct thresh_binary_inv_func : unary_function + { + __host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src <= thresh) * maxVal; + } + + __host__ __device__ __forceinline__ thresh_binary_inv_func() {} + __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} + + T thresh; + T maxVal; + }; + + template struct thresh_trunc_func : unary_function + { + explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return minimum()(src, thresh); + } + + __host__ __device__ __forceinline__ thresh_trunc_func() {} + __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other) + : thresh(other.thresh) {} + + T thresh; + }; + + template struct thresh_to_zero_func : unary_function + { + explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src > thresh) * src; + } + + __host__ __device__ __forceinline__ thresh_to_zero_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other) + : thresh(other.thresh) {} + + T thresh; + }; + + template struct thresh_to_zero_inv_func : unary_function + { + explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src <= thresh) * src; + } + + __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other) + : thresh(other.thresh) {} + + T thresh; + }; + + // Function Object Adaptors + template struct unary_negate : unary_function + { + explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {} + + __device__ __forceinline__ bool operator()(typename TypeTraits::ParameterType x) const + { + return !pred(x); + } + + __host__ __device__ __forceinline__ unary_negate() {} + __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {} + + Predicate pred; + }; + + template __host__ __device__ __forceinline__ unary_negate not1(const Predicate& pred) + { + return unary_negate(pred); + } + + template struct binary_negate : binary_function + { + explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {} + + __device__ __forceinline__ bool operator()(typename TypeTraits::ParameterType x, + typename TypeTraits::ParameterType y) const + { + return !pred(x,y); + } + + __host__ __device__ __forceinline__ binary_negate() {} + __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {} + + Predicate pred; + }; + + template __host__ __device__ __forceinline__ binary_negate not2(const BinaryPredicate& pred) + { + return binary_negate(pred); + } + + template struct binder1st : unary_function + { + __host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {} + + __device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits::ParameterType a) const + { + return op(arg1, a); + } + + __host__ __device__ __forceinline__ binder1st() {} + __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {} + + Op op; + typename Op::first_argument_type arg1; + }; + + template __host__ __device__ __forceinline__ binder1st bind1st(const Op& op, const T& x) + { + return binder1st(op, typename Op::first_argument_type(x)); + } + + template struct binder2nd : unary_function + { + __host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {} + + __forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits::ParameterType a) const + { + return op(a, arg2); + } + + __host__ __device__ __forceinline__ binder2nd() {} + __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {} + + Op op; + typename Op::second_argument_type arg2; + }; + + template __host__ __device__ __forceinline__ binder2nd bind2nd(const Op& op, const T& x) + { + return binder2nd(op, typename Op::second_argument_type(x)); + } + + // Functor Traits + template struct IsUnaryFunction + { + typedef char Yes; + struct No {Yes a[2];}; + + template static Yes check(unary_function); + static No check(...); + + static F makeF(); + + enum { value = (sizeof(check(makeF())) == sizeof(Yes)) }; + }; + + template struct IsBinaryFunction + { + typedef char Yes; + struct No {Yes a[2];}; + + template static Yes check(binary_function); + static No check(...); + + static F makeF(); + + enum { value = (sizeof(check(makeF())) == sizeof(Yes)) }; + }; + + namespace functional_detail + { + template struct UnOpShift { enum { shift = 1 }; }; + template struct UnOpShift { enum { shift = 4 }; }; + template struct UnOpShift { enum { shift = 2 }; }; + + template struct DefaultUnaryShift + { + enum { shift = UnOpShift::shift }; + }; + + template struct BinOpShift { enum { shift = 1 }; }; + template struct BinOpShift { enum { shift = 4 }; }; + template struct BinOpShift { enum { shift = 2 }; }; + + template struct DefaultBinaryShift + { + enum { shift = BinOpShift::shift }; + }; + + template ::value> struct ShiftDispatcher; + template struct ShiftDispatcher + { + enum { shift = DefaultUnaryShift::shift }; + }; + template struct ShiftDispatcher + { + enum { shift = DefaultBinaryShift::shift }; + }; + } + + template struct DefaultTransformShift + { + enum { shift = functional_detail::ShiftDispatcher::shift }; + }; + + template struct DefaultTransformFunctorTraits + { + enum { simple_block_dim_x = 16 }; + enum { simple_block_dim_y = 16 }; + + enum { smart_block_dim_x = 16 }; + enum { smart_block_dim_y = 16 }; + enum { smart_shift = DefaultTransformShift::shift }; + }; + + template struct TransformFunctorTraits : DefaultTransformFunctorTraits {}; + +#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \ + template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type > +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_FUNCTIONAL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/limits.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/limits.hpp new file mode 100644 index 0000000..7e15ed6 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/limits.hpp @@ -0,0 +1,128 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_LIMITS_HPP +#define OPENCV_CUDA_LIMITS_HPP + +#include +#include +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ +template struct numeric_limits; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static bool min() { return false; } + __device__ __forceinline__ static bool max() { return true; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static signed char min() { return SCHAR_MIN; } + __device__ __forceinline__ static signed char max() { return SCHAR_MAX; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned char min() { return 0; } + __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static short min() { return SHRT_MIN; } + __device__ __forceinline__ static short max() { return SHRT_MAX; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned short min() { return 0; } + __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static int min() { return INT_MIN; } + __device__ __forceinline__ static int max() { return INT_MAX; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned int min() { return 0; } + __device__ __forceinline__ static unsigned int max() { return UINT_MAX; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static float min() { return FLT_MIN; } + __device__ __forceinline__ static float max() { return FLT_MAX; } + __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static double min() { return DBL_MIN; } + __device__ __forceinline__ static double max() { return DBL_MAX; } + __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; } + static const bool is_signed = true; +}; +}}} // namespace cv { namespace cuda { namespace cudev { + +//! @endcond + +#endif // OPENCV_CUDA_LIMITS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/reduce.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/reduce.hpp new file mode 100644 index 0000000..5de3650 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/reduce.hpp @@ -0,0 +1,209 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_REDUCE_HPP +#define OPENCV_CUDA_REDUCE_HPP + +#ifndef THRUST_DEBUG // eliminate -Wundef warning +#define THRUST_DEBUG 0 +#endif + +#include +#include "detail/reduce.hpp" +#include "detail/reduce_key_val.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op) + { + reduce_detail::Dispatcher::reductor::template reduce(smem, val, tid, op); + } + template + __device__ __forceinline__ void reduce(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid, + const thrust::tuple& op) + { + reduce_detail::Dispatcher::reductor::template reduce< + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple&>(smem, val, tid, op); + } + + template + __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp) + { + reduce_key_val_detail::Dispatcher::reductor::template reduce(skeys, key, svals, val, tid, cmp); + } + template + __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, + const thrust::tuple& svals, + const thrust::tuple& val, + unsigned int tid, const Cmp& cmp) + { + reduce_key_val_detail::Dispatcher::reductor::template reduce&, + const thrust::tuple&, + const Cmp&>(skeys, key, svals, val, tid, cmp); + } + template + __device__ __forceinline__ void reduceKeyVal(const thrust::tuple& skeys, + const thrust::tuple& key, + const thrust::tuple& svals, + const thrust::tuple& val, + unsigned int tid, + const thrust::tuple& cmp) + { + reduce_key_val_detail::Dispatcher::reductor::template reduce< + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple& + >(skeys, key, svals, val, tid, cmp); + } + + // smem_tuple + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0) + { + return thrust::make_tuple((volatile T0*) t0); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9); + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_REDUCE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/saturate_cast.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/saturate_cast.hpp new file mode 100644 index 0000000..c3a3d1c --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/saturate_cast.hpp @@ -0,0 +1,292 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_SATURATE_CAST_HPP +#define OPENCV_CUDA_SATURATE_CAST_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); } + + template<> __device__ __forceinline__ uchar saturate_cast(schar v) + { + uint res = 0; + int vi = v; + asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(short v) + { + uint res = 0; + asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(ushort v) + { + uint res = 0; + asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(int v) + { + uint res = 0; + asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(uint v) + { + uint res = 0; + asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(float v) + { + uint res = 0; + asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + uint res = 0; + asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ schar saturate_cast(uchar v) + { + uint res = 0; + uint vi = v; + asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(short v) + { + uint res = 0; + asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(ushort v) + { + uint res = 0; + asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(int v) + { + uint res = 0; + asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(uint v) + { + uint res = 0; + asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(float v) + { + uint res = 0; + asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + uint res = 0; + asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ ushort saturate_cast(schar v) + { + ushort res = 0; + int vi = v; + asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(short v) + { + ushort res = 0; + asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(int v) + { + ushort res = 0; + asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(uint v) + { + ushort res = 0; + asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(float v) + { + ushort res = 0; + asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + ushort res = 0; + asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ short saturate_cast(ushort v) + { + short res = 0; + asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(int v) + { + short res = 0; + asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(uint v) + { + short res = 0; + asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(float v) + { + short res = 0; + asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + short res = 0; + asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ int saturate_cast(uint v) + { + int res = 0; + asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ int saturate_cast(float v) + { + return __float2int_rn(v); + } + template<> __device__ __forceinline__ int saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + return __double2int_rn(v); + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ uint saturate_cast(schar v) + { + uint res = 0; + int vi = v; + asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ uint saturate_cast(short v) + { + uint res = 0; + asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ uint saturate_cast(int v) + { + uint res = 0; + asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ uint saturate_cast(float v) + { + return __float2uint_rn(v); + } + template<> __device__ __forceinline__ uint saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + return __double2uint_rn(v); + #else + return saturate_cast((float)v); + #endif + } +}}} + +//! @endcond + +#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/scan.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/scan.hpp new file mode 100644 index 0000000..e128fb0 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/scan.hpp @@ -0,0 +1,258 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_SCAN_HPP +#define OPENCV_CUDA_SCAN_HPP + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/utility.hpp" +#include "opencv2/core/cuda/warp.hpp" +#include "opencv2/core/cuda/warp_shuffle.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 }; + + template struct WarpScan + { + __device__ __forceinline__ WarpScan() {} + __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); } + + __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) + { + const unsigned int lane = idx & 31; + F op; + + if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]); + if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]); + if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]); + if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]); + if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]); + + if( Kind == INCLUSIVE ) + return ptr [idx]; + else + return (lane > 0) ? ptr [idx - 1] : 0; + } + + __device__ __forceinline__ unsigned int index(const unsigned int tid) + { + return tid; + } + + __device__ __forceinline__ void init(volatile T *ptr){} + + static const int warp_offset = 0; + + typedef WarpScan merge; + }; + + template struct WarpScanNoComp + { + __device__ __forceinline__ WarpScanNoComp() {} + __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); } + + __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) + { + const unsigned int lane = threadIdx.x & 31; + F op; + + ptr [idx ] = op(ptr [idx - 1], ptr [idx]); + ptr [idx ] = op(ptr [idx - 2], ptr [idx]); + ptr [idx ] = op(ptr [idx - 4], ptr [idx]); + ptr [idx ] = op(ptr [idx - 8], ptr [idx]); + ptr [idx ] = op(ptr [idx - 16], ptr [idx]); + + if( Kind == INCLUSIVE ) + return ptr [idx]; + else + return (lane > 0) ? ptr [idx - 1] : 0; + } + + __device__ __forceinline__ unsigned int index(const unsigned int tid) + { + return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask); + } + + __device__ __forceinline__ void init(volatile T *ptr) + { + ptr[threadIdx.x] = 0; + } + + static const int warp_smem_stride = 32 + 16 + 1; + static const int warp_offset = 16; + static const int warp_log = 5; + static const int warp_mask = 31; + + typedef WarpScanNoComp merge; + }; + + template struct BlockScan + { + __device__ __forceinline__ BlockScan() {} + __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); } + + __device__ __forceinline__ T operator()(volatile T *ptr) + { + const unsigned int tid = threadIdx.x; + const unsigned int lane = tid & warp_mask; + const unsigned int warp = tid >> warp_log; + + Sc scan; + typename Sc::merge merge_scan; + const unsigned int idx = scan.index(tid); + + T val = scan(ptr, idx); + __syncthreads (); + + if( warp == 0) + scan.init(ptr); + __syncthreads (); + + if( lane == 31 ) + ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx]; + __syncthreads (); + + if( warp == 0 ) + merge_scan(ptr, idx); + __syncthreads(); + + if ( warp > 0) + val = ptr [scan.warp_offset + warp - 1] + val; + __syncthreads (); + + ptr[idx] = val; + __syncthreads (); + + return val ; + } + + static const int warp_log = 5; + static const int warp_mask = 31; + }; + + template + __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid) + { + #if __CUDA_ARCH__ >= 300 + const unsigned int laneId = cv::cuda::device::Warp::laneId(); + + // scan on shuffl functions + #pragma unroll + for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2) + { + const T n = cv::cuda::device::shfl_up(idata, i); + if (laneId >= i) + idata += n; + } + + return idata; + #else + unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1)); + s_Data[pos] = 0; + pos += OPENCV_CUDA_WARP_SIZE; + s_Data[pos] = idata; + + s_Data[pos] += s_Data[pos - 1]; + s_Data[pos] += s_Data[pos - 2]; + s_Data[pos] += s_Data[pos - 4]; + s_Data[pos] += s_Data[pos - 8]; + s_Data[pos] += s_Data[pos - 16]; + + return s_Data[pos]; + #endif + } + + template + __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid) + { + return warpScanInclusive(idata, s_Data, tid) - idata; + } + + template + __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid) + { + if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE) + { + //Bottom-level inclusive warp scan + T warpResult = warpScanInclusive(idata, s_Data, tid); + + //Save top elements of each warp for exclusive warp scan + //sync to wait for warp scans to complete (because s_Data is being overwritten) + __syncthreads(); + if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1)) + { + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult; + } + + //wait for warp scans to complete + __syncthreads(); + + if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) ) + { + //grab top warp elements + T val = s_Data[tid]; + //calculate exclusive scan and write back to shared memory + s_Data[tid] = warpScanExclusive(val, s_Data, tid); + } + + //return updated warp scans with exclusive scan results + __syncthreads(); + + return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE]; + } + else + { + return warpScanInclusive(idata, s_Data, tid); + } + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_SCAN_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/simd_functions.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/simd_functions.hpp new file mode 100644 index 0000000..3d8c2e0 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/simd_functions.hpp @@ -0,0 +1,869 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* + * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of NVIDIA Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP +#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + // 2 + + static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = a ^ b; // sum bits + r = a + b; // actual sum + s = s ^ r; // determine carry-ins for each bit position + s = s & 0x00010000; // carry-in to high word (= carry-out from low word) + r = r - s; // subtract out carry-out from low word + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = a ^ b; // sum bits + r = a - b; // actual sum + s = s ^ r; // determine carry-ins for each bit position + s = s & 0x00010000; // borrow to high word + r = r + s; // compensate for borrow from low word + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t, u, v; + s = a & 0x0000ffff; // extract low halfword + r = b & 0x0000ffff; // extract low halfword + u = ::max(r, s); // maximum of low halfwords + v = ::min(r, s); // minimum of low halfwords + s = a & 0xffff0000; // extract high halfword + r = b & 0xffff0000; // extract high halfword + t = ::max(r, s); // maximum of high halfwords + s = ::min(r, s); // minimum of high halfwords + r = u | t; // maximum of both halfwords + s = v | s; // minimum of both halfwords + r = r - s; // |a - b| = max(a,b) - min(a,b); + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b) + { + unsigned int r, s; + + // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==> + // (a + b) / 2 = (a & b) + ((a ^ b) >> 1) + s = a ^ b; + r = a & b; + s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries + s = s >> 1; + s = r + s; + + return s; + } + + static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==> + // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1) + unsigned int s; + s = a ^ b; + r = a | b; + s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries + s = s >> 1; + r = r - s; + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + r = r ^ c; // extract msbs, msb = 1 if r < 0x8000 + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r & ~c; // msb = 1, if r was 0x0000 + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vseteq2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + r = r ^ c; // extract msbs, msb = 1 if r < 0x8000 + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r & ~c; // msb = 1, if r was 0x0000 + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetge2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80008000; // msbs = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetgt2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80008000; // msbs = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetle2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetlt2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r | c; // msb = 1, if r was not 0x0000 + c = c & 0x80008000; // extract msbs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetne2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r | c; // msb = 1, if r was not 0x0000 + c = c & 0x80008000; // extract msbs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t, u; + r = a & 0x0000ffff; // extract low halfword + s = b & 0x0000ffff; // extract low halfword + t = ::max(r, s); // maximum of low halfwords + r = a & 0xffff0000; // extract high halfword + s = b & 0xffff0000; // extract high halfword + u = ::max(r, s); // maximum of high halfwords + r = t | u; // combine halfword maximums + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t, u; + r = a & 0x0000ffff; // extract low halfword + s = b & 0x0000ffff; // extract low halfword + t = ::min(r, s); // minimum of low halfwords + r = a & 0xffff0000; // extract high halfword + s = b & 0xffff0000; // extract high halfword + u = ::min(r, s); // minimum of high halfwords + r = t | u; // combine halfword minimums + #endif + + return r; + } + + // 4 + + static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t; + s = a ^ b; // sum bits + r = a & 0x7f7f7f7f; // clear msbs + t = b & 0x7f7f7f7f; // clear msbs + s = s & 0x80808080; // msb sum bits + r = r + t; // add without msbs, record carry-out in msbs + r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out + #endif /* __CUDA_ARCH__ >= 300 */ + + return r; + } + + static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t; + s = a ^ ~b; // inverted sum bits + r = a | 0x80808080; // set msbs + t = b & 0x7f7f7f7f; // clear msbs + s = s & 0x80808080; // inverted msb sum bits + r = r - t; // subtract w/o msbs, record inverted borrows in msb + r = r ^ s; // combine inverted msb sum bits and borrows + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b) + { + unsigned int r, s; + + // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==> + // (a + b) / 2 = (a & b) + ((a ^ b) >> 1) + s = a ^ b; + r = a & b; + s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries + s = s >> 1; + s = r + s; + + return s; + } + + static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==> + // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1) + unsigned int c; + c = a ^ b; + r = a | b; + c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries + c = c >> 1; + r = r - c; + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x00 if a == b + c = r | 0x80808080; // set msbs, to catch carry out + r = r ^ c; // extract msbs, msb = 1 if r < 0x80 + c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 + c = r & ~c; // msb = 1, if r was 0x00 + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b) + { + unsigned int r, t; + + #if __CUDA_ARCH__ >= 300 + r = vseteq4(a, b); + t = r << 8; // convert bool + r = t - r; // to mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + t = a ^ b; // 0x00 if a == b + r = t | 0x80808080; // set msbs, to catch carry out + t = t ^ r; // extract msbs, msb = 1 if t < 0x80 + r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80 + r = t & ~r; // msb = 1, if t was 0x00 + t = r >> 7; // build mask + t = r - t; // from + r = t | r; // msbs + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetle4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80808080; // msbs = carry-outs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetlt4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80808080; // msbs = carry-outs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b) + { + unsigned int r, s; + + #if __CUDA_ARCH__ >= 300 + r = vsetge4(a, b); + s = r << 8; // convert bool + r = s - r; // to mask + #else + asm ("not.b32 %0,%0;" : "+r"(b)); + r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + r = r & 0x80808080; // msb = carry-outs + s = r >> 7; // build mask + s = r - s; // from + r = s | r; // msbs + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetgt4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x00 if a == b + c = r | 0x80808080; // set msbs, to catch carry out + c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 + c = r | c; // msb = 1, if r was not 0x00 + c = c & 0x80808080; // extract msbs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetne4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + r = a ^ b; // 0x00 if a == b + c = r | 0x80808080; // set msbs, to catch carry out + c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 + c = r | c; // msb = 1, if r was not 0x00 + c = c & 0x80808080; // extract msbs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = vcmpge4(a, b); // mask = 0xff if a >= b + r = a ^ b; // + s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b) + r = s ^ r; // select a when b >= a, else select b => min(a,b) + r = s - r; // |a - b| = max(a,b) - min(a,b); + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = vcmpge4(a, b); // mask = 0xff if a >= b + r = a & s; // select a when b >= a + s = b & ~s; // select b when b < a + r = r | s; // combine byte selections + #endif + + return r; // byte-wise unsigned maximum + } + + static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = vcmpge4(b, a); // mask = 0xff if a >= b + r = a & s; // select a when b >= a + s = b & ~s; // select b when b < a + r = r | s; // combine byte selections + #endif + + return r; + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/transform.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/transform.hpp new file mode 100644 index 0000000..42aa6ea --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/transform.hpp @@ -0,0 +1,75 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TRANSFORM_HPP +#define OPENCV_CUDA_TRANSFORM_HPP + +#include "common.hpp" +#include "utility.hpp" +#include "detail/transform_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + static inline void transform(PtrStepSz src, PtrStepSz dst, UnOp op, const Mask& mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + transform_detail::TransformDispatcher::cn == 1 && VecTraits::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream); + } + + template + static inline void transform(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, BinOp op, const Mask& mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + transform_detail::TransformDispatcher::cn == 1 && VecTraits::cn == 1 && VecTraits::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream); + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_TRANSFORM_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/type_traits.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/type_traits.hpp new file mode 100644 index 0000000..8b7a3fd --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/type_traits.hpp @@ -0,0 +1,90 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP +#define OPENCV_CUDA_TYPE_TRAITS_HPP + +#include "detail/type_traits_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct IsSimpleParameter + { + enum {value = type_traits_detail::IsIntegral::value || type_traits_detail::IsFloat::value || + type_traits_detail::PointerTraits::type>::value}; + }; + + template struct TypeTraits + { + typedef typename type_traits_detail::UnConst::type NonConstType; + typedef typename type_traits_detail::UnVolatile::type NonVolatileType; + typedef typename type_traits_detail::UnVolatile::type>::type UnqualifiedType; + typedef typename type_traits_detail::PointerTraits::type PointeeType; + typedef typename type_traits_detail::ReferenceTraits::type ReferredType; + + enum { isConst = type_traits_detail::UnConst::value }; + enum { isVolatile = type_traits_detail::UnVolatile::value }; + + enum { isReference = type_traits_detail::ReferenceTraits::value }; + enum { isPointer = type_traits_detail::PointerTraits::type>::value }; + + enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral::value }; + enum { isSignedInt = type_traits_detail::IsSignedIntergral::value }; + enum { isIntegral = type_traits_detail::IsIntegral::value }; + enum { isFloat = type_traits_detail::IsFloat::value }; + enum { isArith = isIntegral || isFloat }; + enum { isVec = type_traits_detail::IsVec::value }; + + typedef typename type_traits_detail::Select::value, + T, typename type_traits_detail::AddParameterType::type>::type ParameterType; + }; +}}} + +//! @endcond + +#endif // OPENCV_CUDA_TYPE_TRAITS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/utility.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/utility.hpp new file mode 100644 index 0000000..7f5db48 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/utility.hpp @@ -0,0 +1,230 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_UTILITY_HPP +#define OPENCV_CUDA_UTILITY_HPP + +#include "saturate_cast.hpp" +#include "datamov_utils.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct CV_EXPORTS ThrustAllocator + { + typedef uchar value_type; + virtual ~ThrustAllocator(); + virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0; + virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0; + static ThrustAllocator& getAllocator(); + static void setAllocator(ThrustAllocator* allocator); + }; + #define OPENCV_CUDA_LOG_WARP_SIZE (5) + #define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE) + #define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla + #define OPENCV_CUDA_MEM_BANKS (1 << OPENCV_CUDA_LOG_MEM_BANKS) + + /////////////////////////////////////////////////////////////////////////////// + // swap + + template void __device__ __host__ __forceinline__ swap(T& a, T& b) + { + const T temp = a; + a = b; + b = temp; + } + + /////////////////////////////////////////////////////////////////////////////// + // Mask Reader + + struct SingleMask + { + explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {} + __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){} + + __device__ __forceinline__ bool operator()(int y, int x) const + { + return mask.ptr(y)[x] != 0; + } + + PtrStepb mask; + }; + + struct SingleMaskChannels + { + __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_) + : mask(mask_), channels(channels_) {} + __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_) + :mask(mask_.mask), channels(mask_.channels){} + + __device__ __forceinline__ bool operator()(int y, int x) const + { + return mask.ptr(y)[x / channels] != 0; + } + + PtrStepb mask; + int channels; + }; + + struct MaskCollection + { + explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_) + : maskCollection(maskCollection_) {} + + __device__ __forceinline__ MaskCollection(const MaskCollection& masks_) + : maskCollection(masks_.maskCollection), curMask(masks_.curMask){} + + __device__ __forceinline__ void next() + { + curMask = *maskCollection++; + } + __device__ __forceinline__ void setMask(int z) + { + curMask = maskCollection[z]; + } + + __device__ __forceinline__ bool operator()(int y, int x) const + { + uchar val; + return curMask.data == 0 || (ForceGlob::Load(curMask.ptr(y), x, val), (val != 0)); + } + + const PtrStepb* maskCollection; + PtrStepb curMask; + }; + + struct WithOutMask + { + __host__ __device__ __forceinline__ WithOutMask(){} + __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){} + + __device__ __forceinline__ void next() const + { + } + __device__ __forceinline__ void setMask(int) const + { + } + + __device__ __forceinline__ bool operator()(int, int) const + { + return true; + } + + __device__ __forceinline__ bool operator()(int, int, int) const + { + return true; + } + + static __device__ __forceinline__ bool check(int, int) + { + return true; + } + + static __device__ __forceinline__ bool check(int, int, int) + { + return true; + } + }; + + /////////////////////////////////////////////////////////////////////////////// + // Solve linear system + + // solve 2x2 linear system Ax=b + template __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2]) + { + T det = A[0][0] * A[1][1] - A[1][0] * A[0][1]; + + if (det != 0) + { + double invdet = 1.0 / det; + + x[0] = saturate_cast(invdet * (b[0] * A[1][1] - b[1] * A[0][1])); + + x[1] = saturate_cast(invdet * (A[0][0] * b[1] - A[1][0] * b[0])); + + return true; + } + + return false; + } + + // solve 3x3 linear system Ax=b + template __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3]) + { + T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) + - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) + + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]); + + if (det != 0) + { + double invdet = 1.0 / det; + + x[0] = saturate_cast(invdet * + (b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) - + A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) + + A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] ))); + + x[1] = saturate_cast(invdet * + (A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) - + b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) + + A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0]))); + + x[2] = saturate_cast(invdet * + (A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) - + A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) + + b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]))); + + return true; + } + + return false; + } +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_UTILITY_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_distance.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_distance.hpp new file mode 100644 index 0000000..ef6e510 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_distance.hpp @@ -0,0 +1,232 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP +#define OPENCV_CUDA_VEC_DISTANCE_HPP + +#include "reduce.hpp" +#include "functional.hpp" +#include "detail/vec_distance_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct L1Dist + { + typedef int value_type; + typedef int result_type; + + __device__ __forceinline__ L1Dist() : mySum(0) {} + + __device__ __forceinline__ void reduceIter(int val1, int val2) + { + mySum = __sad(val1, val2, mySum); + } + + template __device__ __forceinline__ void reduceAll(int* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator int() const + { + return mySum; + } + + int mySum; + }; + template <> struct L1Dist + { + typedef float value_type; + typedef float result_type; + + __device__ __forceinline__ L1Dist() : mySum(0.0f) {} + + __device__ __forceinline__ void reduceIter(float val1, float val2) + { + mySum += ::fabs(val1 - val2); + } + + template __device__ __forceinline__ void reduceAll(float* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator float() const + { + return mySum; + } + + float mySum; + }; + + struct L2Dist + { + typedef float value_type; + typedef float result_type; + + __device__ __forceinline__ L2Dist() : mySum(0.0f) {} + + __device__ __forceinline__ void reduceIter(float val1, float val2) + { + float reg = val1 - val2; + mySum += reg * reg; + } + + template __device__ __forceinline__ void reduceAll(float* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator float() const + { + return sqrtf(mySum); + } + + float mySum; + }; + + struct HammingDist + { + typedef int value_type; + typedef int result_type; + + __device__ __forceinline__ HammingDist() : mySum(0) {} + + __device__ __forceinline__ void reduceIter(int val1, int val2) + { + mySum += __popc(val1 ^ val2); + } + + template __device__ __forceinline__ void reduceAll(int* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator int() const + { + return mySum; + } + + int mySum; + }; + + // calc distance between two vectors in global memory + template + __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) + { + for (int i = tid; i < len; i += THREAD_DIM) + { + T1 val1; + ForceGlob::Load(vec1, i, val1); + + T2 val2; + ForceGlob::Load(vec2, i, val2); + + dist.reduceIter(val1, val2); + } + + dist.reduceAll(smem, tid); + } + + // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory + template + __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) + { + vec_distance_detail::VecDiffCachedCalculator::calc(vecCached, vecGlob, len, dist, tid); + + dist.reduceAll(smem, tid); + } + + // calc distance between two vectors in global memory + template struct VecDiffGlobal + { + explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0) + { + vec1 = vec1_; + } + + template + __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const + { + calcVecDiffGlobal(vec1, vec2, len, dist, smem, tid); + } + + const T1* vec1; + }; + + // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory + template struct VecDiffCachedRegister + { + template __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) + { + if (glob_tid < len) + smem[glob_tid] = vec1[glob_tid]; + __syncthreads(); + + U* vec1ValsPtr = vec1Vals; + + #pragma unroll + for (int i = tid; i < MAX_LEN; i += THREAD_DIM) + *vec1ValsPtr++ = smem[i]; + + __syncthreads(); + } + + template + __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const + { + calcVecDiffCached(vec1Vals, vec2, len, dist, smem, tid); + } + + U vec1Vals[MAX_LEN / THREAD_DIM]; + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_VEC_DISTANCE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_math.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_math.hpp new file mode 100644 index 0000000..80b1303 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_math.hpp @@ -0,0 +1,923 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VECMATH_HPP +#define OPENCV_CUDA_VECMATH_HPP + +#include "vec_traits.hpp" +#include "saturate_cast.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + +// saturate_cast + +namespace vec_math_detail +{ + template struct SatCastHelper; + template struct SatCastHelper<1, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x)); + } + }; + template struct SatCastHelper<2, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); + } + }; + template struct SatCastHelper<3, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); + } + }; + template struct SatCastHelper<4, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); + } + }; + + template static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v) + { + return SatCastHelper::cn, VecD>::cast(v); + } +} + +template static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper(v);} + +// unary operators + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \ + { \ + return VecTraits::make(op (a.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z), op (a.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP + +// unary functions + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \ + { \ + return VecTraits::make(func (a.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \ + { \ + return VecTraits::make(func (a.x), func (a.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \ + { \ + return VecTraits::make(func (a.x), func (a.y), func (a.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \ + { \ + return VecTraits::make(func (a.x), func (a.y), func (a.z), func (a.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC + +// binary operators (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(a.x op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP + +// binary operators (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s); \ + } \ + __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(s op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s, a.w op s); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z, s op b.w); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP + +// binary function (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func (a.x, b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC + +// binary function (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC + +}}} // namespace cv { namespace cuda { namespace device + +//! @endcond + +#endif // OPENCV_CUDA_VECMATH_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_traits.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_traits.hpp new file mode 100644 index 0000000..b5ff281 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/vec_traits.hpp @@ -0,0 +1,288 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VEC_TRAITS_HPP +#define OPENCV_CUDA_VEC_TRAITS_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct TypeVec; + + struct __align__(8) uchar8 + { + uchar a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7) + { + uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(8) char8 + { + schar a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) + { + char8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(16) ushort8 + { + ushort a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7) + { + ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(16) short8 + { + short a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7) + { + short8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(32) uint8 + { + uint a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7) + { + uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(32) int8 + { + int a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7) + { + int8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(32) float8 + { + float a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7) + { + float8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct double8 + { + double a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7) + { + double8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + +#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \ + template<> struct TypeVec { typedef type vec_type; }; \ + template<> struct TypeVec { typedef type ## 1 vec_type; }; \ + template<> struct TypeVec { typedef type ## 2 vec_type; }; \ + template<> struct TypeVec { typedef type ## 2 vec_type; }; \ + template<> struct TypeVec { typedef type ## 3 vec_type; }; \ + template<> struct TypeVec { typedef type ## 3 vec_type; }; \ + template<> struct TypeVec { typedef type ## 4 vec_type; }; \ + template<> struct TypeVec { typedef type ## 4 vec_type; }; \ + template<> struct TypeVec { typedef type ## 8 vec_type; }; \ + template<> struct TypeVec { typedef type ## 8 vec_type; }; + + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double) + + #undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC + + template<> struct TypeVec { typedef schar vec_type; }; + template<> struct TypeVec { typedef char2 vec_type; }; + template<> struct TypeVec { typedef char3 vec_type; }; + template<> struct TypeVec { typedef char4 vec_type; }; + template<> struct TypeVec { typedef char8 vec_type; }; + + template<> struct TypeVec { typedef uchar vec_type; }; + template<> struct TypeVec { typedef uchar2 vec_type; }; + template<> struct TypeVec { typedef uchar3 vec_type; }; + template<> struct TypeVec { typedef uchar4 vec_type; }; + template<> struct TypeVec { typedef uchar8 vec_type; }; + + template struct VecTraits; + +#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=1}; \ + static __device__ __host__ __forceinline__ type all(type v) {return v;} \ + static __device__ __host__ __forceinline__ type make(type x) {return x;} \ + static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=1}; \ + static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \ + static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \ + static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=2}; \ + static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \ + static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \ + static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=3}; \ + static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \ + static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \ + static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=4}; \ + static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \ + static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \ + static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=8}; \ + static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \ + static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \ + static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \ + }; + + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double) + + #undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS + + template<> struct VecTraits + { + typedef char elem_type; + enum {cn=1}; + static __device__ __host__ __forceinline__ char all(char v) {return v;} + static __device__ __host__ __forceinline__ char make(char x) {return x;} + static __device__ __host__ __forceinline__ char make(const char* x) {return *x;} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=1}; + static __device__ __host__ __forceinline__ schar all(schar v) {return v;} + static __device__ __host__ __forceinline__ schar make(schar x) {return x;} + static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=1}; + static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);} + static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);} + static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=2}; + static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);} + static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);} + static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=3}; + static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);} + static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);} + static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=4}; + static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);} + static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);} + static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=8}; + static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);} + static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);} + static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_VEC_TRAITS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp.hpp new file mode 100644 index 0000000..8af7e6a --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp.hpp @@ -0,0 +1,139 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DEVICE_WARP_HPP +#define OPENCV_CUDA_DEVICE_WARP_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct Warp + { + enum + { + LOG_WARP_SIZE = 5, + WARP_SIZE = 1 << LOG_WARP_SIZE, + STRIDE = WARP_SIZE + }; + + /** \brief Returns the warp lane ID of the calling thread. */ + static __device__ __forceinline__ unsigned int laneId() + { + unsigned int ret; + asm("mov.u32 %0, %%laneid;" : "=r"(ret) ); + return ret; + } + + template + static __device__ __forceinline__ void fill(It beg, It end, const T& value) + { + for(It t = beg + laneId(); t < end; t += STRIDE) + *t = value; + } + + template + static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out) + { + for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) + *out = *t; + return out; + } + + template + static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op) + { + for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) + *out = op(*t); + return out; + } + + template + static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) + { + unsigned int lane = laneId(); + + InIt1 t1 = beg1 + lane; + InIt2 t2 = beg2 + lane; + for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE) + *out = op(*t1, *t2); + return out; + } + + template + static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op) + { + const unsigned int lane = laneId(); + + if (lane < 16) + { + T partial = ptr[lane]; + + ptr[lane] = partial = op(partial, ptr[lane + 16]); + ptr[lane] = partial = op(partial, ptr[lane + 8]); + ptr[lane] = partial = op(partial, ptr[lane + 4]); + ptr[lane] = partial = op(partial, ptr[lane + 2]); + ptr[lane] = partial = op(partial, ptr[lane + 1]); + } + + return *ptr; + } + + template + static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) + { + unsigned int lane = laneId(); + value += lane; + + for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE) + *t = value; + } + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp_reduce.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp_reduce.hpp new file mode 100644 index 0000000..530303d --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp_reduce.hpp @@ -0,0 +1,76 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__ +#define OPENCV_CUDA_WARP_REDUCE_HPP__ + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x) + { + const unsigned int lane = tid & 31; // index of thread in warp (0..31) + + if (lane < 16) + { + T partial = ptr[tid]; + + ptr[tid] = partial = partial + ptr[tid + 16]; + ptr[tid] = partial = partial + ptr[tid + 8]; + ptr[tid] = partial = partial + ptr[tid + 4]; + ptr[tid] = partial = partial + ptr[tid + 2]; + ptr[tid] = partial = partial + ptr[tid + 1]; + } + + return ptr[tid - lane]; + } +}}} // namespace cv { namespace cuda { namespace cudev { + +//! @endcond + +#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp_shuffle.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp_shuffle.hpp new file mode 100644 index 0000000..0da54ae --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda/warp_shuffle.hpp @@ -0,0 +1,162 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP +#define OPENCV_CUDA_WARP_SHUFFLE_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ +#if __CUDACC_VER_MAJOR__ >= 9 +# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z) +# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z) +# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z) +#endif + template + __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return __shfl(val, srcLane, width); + #else + return T(); + #endif + } + __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return (unsigned int) __shfl((int) val, srcLane, width); + #else + return 0; + #endif + } + __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + int lo = __double2loint(val); + int hi = __double2hiint(val); + + lo = __shfl(lo, srcLane, width); + hi = __shfl(hi, srcLane, width); + + return __hiloint2double(hi, lo); + #else + return 0.0; + #endif + } + + template + __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return __shfl_down(val, delta, width); + #else + return T(); + #endif + } + __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return (unsigned int) __shfl_down((int) val, delta, width); + #else + return 0; + #endif + } + __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + int lo = __double2loint(val); + int hi = __double2hiint(val); + + lo = __shfl_down(lo, delta, width); + hi = __shfl_down(hi, delta, width); + + return __hiloint2double(hi, lo); + #else + return 0.0; + #endif + } + + template + __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return __shfl_up(val, delta, width); + #else + return T(); + #endif + } + __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return (unsigned int) __shfl_up((int) val, delta, width); + #else + return 0; + #endif + } + __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + int lo = __double2loint(val); + int hi = __double2hiint(val); + + lo = __shfl_up(lo, delta, width); + hi = __shfl_up(hi, delta, width); + + return __hiloint2double(hi, lo); + #else + return 0.0; + #endif + } +}}} + +# undef __shfl +# undef __shfl_up +# undef __shfl_down + +//! @endcond + +#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda_stream_accessor.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda_stream_accessor.hpp new file mode 100644 index 0000000..deaf356 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda_stream_accessor.hpp @@ -0,0 +1,86 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP +#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP + +#ifndef __cplusplus +# error cuda_stream_accessor.hpp header must be compiled as C++ +#endif + +/** @file cuda_stream_accessor.hpp + * This is only header file that depends on CUDA Runtime API. All other headers are independent. + */ + +#include +#include "opencv2/core/cuda.hpp" + +namespace cv +{ + namespace cuda + { + +//! @addtogroup cudacore_struct +//! @{ + + /** @brief Class that enables getting cudaStream_t from cuda::Stream + */ + struct StreamAccessor + { + CV_EXPORTS static cudaStream_t getStream(const Stream& stream); + CV_EXPORTS static Stream wrapStream(cudaStream_t stream); + }; + + /** @brief Class that enables getting cudaEvent_t from cuda::Event + */ + struct EventAccessor + { + CV_EXPORTS static cudaEvent_t getEvent(const Event& event); + CV_EXPORTS static Event wrapEvent(cudaEvent_t event); + }; + +//! @} + + } +} + +#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda_types.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda_types.hpp new file mode 100644 index 0000000..b33f061 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cuda_types.hpp @@ -0,0 +1,144 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDA_TYPES_HPP +#define OPENCV_CORE_CUDA_TYPES_HPP + +#ifndef __cplusplus +# error cuda_types.hpp header must be compiled as C++ +#endif + +#if defined(__OPENCV_BUILD) && defined(__clang__) +#pragma clang diagnostic ignored "-Winconsistent-missing-override" +#endif +#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 +#pragma GCC diagnostic ignored "-Wsuggest-override" +#endif + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +#ifdef __CUDACC__ + #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__ +#else + #define __CV_CUDA_HOST_DEVICE__ +#endif + +namespace cv +{ + namespace cuda + { + + // Simple lightweight structures that encapsulates information about an image on device. + // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile + + template struct DevPtr + { + typedef T elem_type; + typedef int index_type; + + enum { elem_size = sizeof(elem_type) }; + + T* data; + + __CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {} + __CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {} + + __CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; } + __CV_CUDA_HOST_DEVICE__ operator T*() { return data; } + __CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; } + }; + + template struct PtrSz : public DevPtr + { + __CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {} + __CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr(data_), size(size_) {} + + size_t size; + }; + + template struct PtrStep : public DevPtr + { + __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {} + __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr(data_), step(step_) {} + + size_t step; + + __CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)(((DevPtr*)this)->data) + y * step); } + __CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)(((DevPtr*)this)->data) + y * step); } + + __CV_CUDA_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } + __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } + }; + + template struct PtrStepSz : public PtrStep + { + __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {} + __CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_) + : PtrStep(data_, step_), cols(cols_), rows(rows_) {} + + template + explicit PtrStepSz(const PtrStepSz& d) : PtrStep((T*)d.data, d.step), cols(d.cols), rows(d.rows){} + + int cols; + int rows; + }; + + typedef PtrStepSz PtrStepSzb; + typedef PtrStepSz PtrStepSzus; + typedef PtrStepSz PtrStepSzf; + typedef PtrStepSz PtrStepSzi; + + typedef PtrStep PtrStepb; + typedef PtrStep PtrStepus; + typedef PtrStep PtrStepf; + typedef PtrStep PtrStepi; + + } +} + +//! @endcond + +#endif /* OPENCV_CORE_CUDA_TYPES_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cv_cpu_dispatch.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cv_cpu_dispatch.h new file mode 100644 index 0000000..fe15e51 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cv_cpu_dispatch.h @@ -0,0 +1,359 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#if defined __OPENCV_BUILD \ + +#include "cv_cpu_config.h" +#include "cv_cpu_helper.h" + +#ifdef CV_CPU_DISPATCH_MODE +#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#else +#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#define CV_CPU_BASELINE_MODE 1 +#endif + + +#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */ +#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__)) +#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros + + +#if defined CV_ENABLE_INTRINSICS \ + && !defined CV_DISABLE_OPTIMIZATION \ + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ + +#ifdef CV_CPU_COMPILE_SSE2 +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#endif +#ifdef CV_CPU_COMPILE_SSE3 +# include +# define CV_SSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSSE3 +# include +# define CV_SSSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_1 +# include +# define CV_SSE4_1 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_2 +# include +# define CV_SSE4_2 1 +#endif +#ifdef CV_CPU_COMPILE_POPCNT +# ifdef _MSC_VER +# include +# if defined(_M_X64) +# define CV_POPCNT_U64 _mm_popcnt_u64 +# endif +# define CV_POPCNT_U32 _mm_popcnt_u32 +# else +# include +# if defined(__x86_64__) +# define CV_POPCNT_U64 __builtin_popcountll +# endif +# define CV_POPCNT_U32 __builtin_popcount +# endif +# define CV_POPCNT 1 +#endif +#ifdef CV_CPU_COMPILE_AVX +# include +# define CV_AVX 1 +#endif +#ifdef CV_CPU_COMPILE_FP16 +# if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) +# include +# else +# include +# endif +# define CV_FP16 1 +#endif +#ifdef CV_CPU_COMPILE_AVX2 +# include +# define CV_AVX2 1 +#endif +#ifdef CV_CPU_COMPILE_AVX_512F +# include +# define CV_AVX_512F 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_COMMON +# define CV_AVX512_COMMON 1 +# define CV_AVX_512CD 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNL +# define CV_AVX512_KNL 1 +# define CV_AVX_512ER 1 +# define CV_AVX_512PF 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNM +# define CV_AVX512_KNM 1 +# define CV_AVX_5124FMAPS 1 +# define CV_AVX_5124VNNIW 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_SKX +# define CV_AVX512_SKX 1 +# define CV_AVX_512VL 1 +# define CV_AVX_512BW 1 +# define CV_AVX_512DQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CNL +# define CV_AVX512_CNL 1 +# define CV_AVX_512IFMA 1 +# define CV_AVX_512VBMI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CLX +# define CV_AVX512_CLX 1 +# define CV_AVX_512VNNI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_ICL +# define CV_AVX512_ICL 1 +# undef CV_AVX_512IFMA +# define CV_AVX_512IFMA 1 +# undef CV_AVX_512VBMI +# define CV_AVX_512VBMI 1 +# undef CV_AVX_512VNNI +# define CV_AVX_512VNNI 1 +# define CV_AVX_512VBMI2 1 +# define CV_AVX_512BITALG 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif +#ifdef CV_CPU_COMPILE_FMA3 +# define CV_FMA3 1 +#endif + +#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#endif + +#if defined(__ARM_NEON__) || defined(__aarch64__) +# include +#endif + +#ifdef CV_CPU_COMPILE_VSX +# include +# undef vector +# undef pixel +# undef bool +# define CV_VSX 1 +#endif + +#ifdef CV_CPU_COMPILE_VSX3 +# define CV_VSX3 1 +#endif + +#ifdef CV_CPU_COMPILE_MSA +# include "hal/msa_macros.h" +# define CV_MSA 1 +#endif + +#ifdef __EMSCRIPTEN__ +# define CV_WASM_SIMD 1 +# include +#endif + +#if defined CV_CPU_COMPILE_RVV +# define CV_RVV 1 +# include +#endif + +#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ + +#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX +struct VZeroUpperGuard { +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline VZeroUpperGuard() { _mm256_zeroupper(); } +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline ~VZeroUpperGuard() { _mm256_zeroupper(); } +}; +#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard); +#endif + +#ifdef __CV_AVX_GUARD +#define CV_AVX_GUARD __CV_AVX_GUARD +#else +#define CV_AVX_GUARD +#endif + +#endif // __OPENCV_BUILD + + + +#if !defined __OPENCV_BUILD /* Compatibility code */ \ + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__) +# include +# undef vector +# undef pixel +# undef bool +# define CV_VSX 1 +#endif + +#ifdef __F16C__ +# include +# define CV_FP16 1 +#endif + +#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code) + + + +#ifndef CV_MMX +# define CV_MMX 0 +#endif +#ifndef CV_SSE +# define CV_SSE 0 +#endif +#ifndef CV_SSE2 +# define CV_SSE2 0 +#endif +#ifndef CV_SSE3 +# define CV_SSE3 0 +#endif +#ifndef CV_SSSE3 +# define CV_SSSE3 0 +#endif +#ifndef CV_SSE4_1 +# define CV_SSE4_1 0 +#endif +#ifndef CV_SSE4_2 +# define CV_SSE4_2 0 +#endif +#ifndef CV_POPCNT +# define CV_POPCNT 0 +#endif +#ifndef CV_AVX +# define CV_AVX 0 +#endif +#ifndef CV_FP16 +# define CV_FP16 0 +#endif +#ifndef CV_AVX2 +# define CV_AVX2 0 +#endif +#ifndef CV_FMA3 +# define CV_FMA3 0 +#endif +#ifndef CV_AVX_512F +# define CV_AVX_512F 0 +#endif +#ifndef CV_AVX_512BW +# define CV_AVX_512BW 0 +#endif +#ifndef CV_AVX_512CD +# define CV_AVX_512CD 0 +#endif +#ifndef CV_AVX_512DQ +# define CV_AVX_512DQ 0 +#endif +#ifndef CV_AVX_512ER +# define CV_AVX_512ER 0 +#endif +#ifndef CV_AVX_512IFMA +# define CV_AVX_512IFMA 0 +#endif +#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated +#ifndef CV_AVX_512PF +# define CV_AVX_512PF 0 +#endif +#ifndef CV_AVX_512VBMI +# define CV_AVX_512VBMI 0 +#endif +#ifndef CV_AVX_512VL +# define CV_AVX_512VL 0 +#endif +#ifndef CV_AVX_5124FMAPS +# define CV_AVX_5124FMAPS 0 +#endif +#ifndef CV_AVX_5124VNNIW +# define CV_AVX_5124VNNIW 0 +#endif +#ifndef CV_AVX_512VPOPCNTDQ +# define CV_AVX_512VPOPCNTDQ 0 +#endif +#ifndef CV_AVX_512VNNI +# define CV_AVX_512VNNI 0 +#endif +#ifndef CV_AVX_512VBMI2 +# define CV_AVX_512VBMI2 0 +#endif +#ifndef CV_AVX_512BITALG +# define CV_AVX_512BITALG 0 +#endif +#ifndef CV_AVX512_COMMON +# define CV_AVX512_COMMON 0 +#endif +#ifndef CV_AVX512_KNL +# define CV_AVX512_KNL 0 +#endif +#ifndef CV_AVX512_KNM +# define CV_AVX512_KNM 0 +#endif +#ifndef CV_AVX512_SKX +# define CV_AVX512_SKX 0 +#endif +#ifndef CV_AVX512_CNL +# define CV_AVX512_CNL 0 +#endif +#ifndef CV_AVX512_CLX +# define CV_AVX512_CLX 0 +#endif +#ifndef CV_AVX512_ICL +# define CV_AVX512_ICL 0 +#endif + +#ifndef CV_NEON +# define CV_NEON 0 +#endif + +#ifndef CV_VSX +# define CV_VSX 0 +#endif + +#ifndef CV_VSX3 +# define CV_VSX3 0 +#endif + +#ifndef CV_MSA +# define CV_MSA 0 +#endif + +#ifndef CV_WASM_SIMD +# define CV_WASM_SIMD 0 +#endif + +#ifndef CV_RVV +# define CV_RVV 0 +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cv_cpu_helper.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cv_cpu_helper.h new file mode 100644 index 0000000..39ae0b9 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cv_cpu_helper.h @@ -0,0 +1,508 @@ +// AUTOGENERATED, DO NOT EDIT + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE +# define CV_TRY_SSE 1 +# define CV_CPU_FORCE_SSE 1 +# define CV_CPU_HAS_SUPPORT_SSE 1 +# define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE +# define CV_TRY_SSE 1 +# define CV_CPU_FORCE_SSE 0 +# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE)) +# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args) +# define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args) +#else +# define CV_TRY_SSE 0 +# define CV_CPU_FORCE_SSE 0 +# define CV_CPU_HAS_SUPPORT_SSE 0 +# define CV_CPU_CALL_SSE(fn, args) +# define CV_CPU_CALL_SSE_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2 +# define CV_TRY_SSE2 1 +# define CV_CPU_FORCE_SSE2 1 +# define CV_CPU_HAS_SUPPORT_SSE2 1 +# define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2 +# define CV_TRY_SSE2 1 +# define CV_CPU_FORCE_SSE2 0 +# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2)) +# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args) +# define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args) +#else +# define CV_TRY_SSE2 0 +# define CV_CPU_FORCE_SSE2 0 +# define CV_CPU_HAS_SUPPORT_SSE2 0 +# define CV_CPU_CALL_SSE2(fn, args) +# define CV_CPU_CALL_SSE2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3 +# define CV_TRY_SSE3 1 +# define CV_CPU_FORCE_SSE3 1 +# define CV_CPU_HAS_SUPPORT_SSE3 1 +# define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3 +# define CV_TRY_SSE3 1 +# define CV_CPU_FORCE_SSE3 0 +# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3)) +# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args) +# define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args) +#else +# define CV_TRY_SSE3 0 +# define CV_CPU_FORCE_SSE3 0 +# define CV_CPU_HAS_SUPPORT_SSE3 0 +# define CV_CPU_CALL_SSE3(fn, args) +# define CV_CPU_CALL_SSE3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3 +# define CV_TRY_SSSE3 1 +# define CV_CPU_FORCE_SSSE3 1 +# define CV_CPU_HAS_SUPPORT_SSSE3 1 +# define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3 +# define CV_TRY_SSSE3 1 +# define CV_CPU_FORCE_SSSE3 0 +# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3)) +# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args) +# define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args) +#else +# define CV_TRY_SSSE3 0 +# define CV_CPU_FORCE_SSSE3 0 +# define CV_CPU_HAS_SUPPORT_SSSE3 0 +# define CV_CPU_CALL_SSSE3(fn, args) +# define CV_CPU_CALL_SSSE3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1 +# define CV_TRY_SSE4_1 1 +# define CV_CPU_FORCE_SSE4_1 1 +# define CV_CPU_HAS_SUPPORT_SSE4_1 1 +# define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1 +# define CV_TRY_SSE4_1 1 +# define CV_CPU_FORCE_SSE4_1 0 +# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1)) +# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args) +# define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args) +#else +# define CV_TRY_SSE4_1 0 +# define CV_CPU_FORCE_SSE4_1 0 +# define CV_CPU_HAS_SUPPORT_SSE4_1 0 +# define CV_CPU_CALL_SSE4_1(fn, args) +# define CV_CPU_CALL_SSE4_1_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2 +# define CV_TRY_SSE4_2 1 +# define CV_CPU_FORCE_SSE4_2 1 +# define CV_CPU_HAS_SUPPORT_SSE4_2 1 +# define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2 +# define CV_TRY_SSE4_2 1 +# define CV_CPU_FORCE_SSE4_2 0 +# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) +# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args) +# define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args) +#else +# define CV_TRY_SSE4_2 0 +# define CV_CPU_FORCE_SSE4_2 0 +# define CV_CPU_HAS_SUPPORT_SSE4_2 0 +# define CV_CPU_CALL_SSE4_2(fn, args) +# define CV_CPU_CALL_SSE4_2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT +# define CV_TRY_POPCNT 1 +# define CV_CPU_FORCE_POPCNT 1 +# define CV_CPU_HAS_SUPPORT_POPCNT 1 +# define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT +# define CV_TRY_POPCNT 1 +# define CV_CPU_FORCE_POPCNT 0 +# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT)) +# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args) +# define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args) +#else +# define CV_TRY_POPCNT 0 +# define CV_CPU_FORCE_POPCNT 0 +# define CV_CPU_HAS_SUPPORT_POPCNT 0 +# define CV_CPU_CALL_POPCNT(fn, args) +# define CV_CPU_CALL_POPCNT_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX +# define CV_TRY_AVX 1 +# define CV_CPU_FORCE_AVX 1 +# define CV_CPU_HAS_SUPPORT_AVX 1 +# define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX +# define CV_TRY_AVX 1 +# define CV_CPU_FORCE_AVX 0 +# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) +# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args) +# define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args) +#else +# define CV_TRY_AVX 0 +# define CV_CPU_FORCE_AVX 0 +# define CV_CPU_HAS_SUPPORT_AVX 0 +# define CV_CPU_CALL_AVX(fn, args) +# define CV_CPU_CALL_AVX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16 +# define CV_TRY_FP16 1 +# define CV_CPU_FORCE_FP16 1 +# define CV_CPU_HAS_SUPPORT_FP16 1 +# define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16 +# define CV_TRY_FP16 1 +# define CV_CPU_FORCE_FP16 0 +# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16)) +# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args) +# define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args) +#else +# define CV_TRY_FP16 0 +# define CV_CPU_FORCE_FP16 0 +# define CV_CPU_HAS_SUPPORT_FP16 0 +# define CV_CPU_CALL_FP16(fn, args) +# define CV_CPU_CALL_FP16_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2 +# define CV_TRY_AVX2 1 +# define CV_CPU_FORCE_AVX2 1 +# define CV_CPU_HAS_SUPPORT_AVX2 1 +# define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2 +# define CV_TRY_AVX2 1 +# define CV_CPU_FORCE_AVX2 0 +# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) +# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args) +# define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args) +#else +# define CV_TRY_AVX2 0 +# define CV_CPU_FORCE_AVX2 0 +# define CV_CPU_HAS_SUPPORT_AVX2 0 +# define CV_CPU_CALL_AVX2(fn, args) +# define CV_CPU_CALL_AVX2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3 +# define CV_TRY_FMA3 1 +# define CV_CPU_FORCE_FMA3 1 +# define CV_CPU_HAS_SUPPORT_FMA3 1 +# define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3 +# define CV_TRY_FMA3 1 +# define CV_CPU_FORCE_FMA3 0 +# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3)) +# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args) +# define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args) +#else +# define CV_TRY_FMA3 0 +# define CV_CPU_FORCE_FMA3 0 +# define CV_CPU_HAS_SUPPORT_FMA3 0 +# define CV_CPU_CALL_FMA3(fn, args) +# define CV_CPU_CALL_FMA3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F +# define CV_TRY_AVX_512F 1 +# define CV_CPU_FORCE_AVX_512F 1 +# define CV_CPU_HAS_SUPPORT_AVX_512F 1 +# define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F +# define CV_TRY_AVX_512F 1 +# define CV_CPU_FORCE_AVX_512F 0 +# define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F)) +# define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args) +# define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args) +#else +# define CV_TRY_AVX_512F 0 +# define CV_CPU_FORCE_AVX_512F 0 +# define CV_CPU_HAS_SUPPORT_AVX_512F 0 +# define CV_CPU_CALL_AVX_512F(fn, args) +# define CV_CPU_CALL_AVX_512F_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 1 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON)) +# define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +#else +# define CV_TRY_AVX512_COMMON 0 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...) CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 1 +# define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL)) +# define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +#else +# define CV_TRY_AVX512_KNL 0 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 0 +# define CV_CPU_CALL_AVX512_KNL(fn, args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 1 +# define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM)) +# define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +#else +# define CV_TRY_AVX512_KNM 0 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 0 +# define CV_CPU_CALL_AVX512_KNM(fn, args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX +# define CV_TRY_AVX512_SKX 1 +# define CV_CPU_FORCE_AVX512_SKX 1 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX 1 +# define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX +# define CV_TRY_AVX512_SKX 1 +# define CV_CPU_FORCE_AVX512_SKX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX)) +# define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args) +#else +# define CV_TRY_AVX512_SKX 0 +# define CV_CPU_FORCE_AVX512_SKX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX 0 +# define CV_CPU_CALL_AVX512_SKX(fn, args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 1 +# define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL)) +# define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +#else +# define CV_TRY_AVX512_CNL 0 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 0 +# define CV_CPU_CALL_AVX512_CNL(fn, args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 1 +# define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX)) +# define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) +#else +# define CV_TRY_AVX512_CLX 0 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 0 +# define CV_CPU_CALL_AVX512_CLX(fn, args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...) CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 1 +# define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL)) +# define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +#else +# define CV_TRY_AVX512_ICL 0 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 0 +# define CV_CPU_CALL_AVX512_ICL(fn, args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...) CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON +# define CV_TRY_NEON 1 +# define CV_CPU_FORCE_NEON 1 +# define CV_CPU_HAS_SUPPORT_NEON 1 +# define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON +# define CV_TRY_NEON 1 +# define CV_CPU_FORCE_NEON 0 +# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON)) +# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args) +# define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args) +#else +# define CV_TRY_NEON 0 +# define CV_CPU_FORCE_NEON 0 +# define CV_CPU_HAS_SUPPORT_NEON 0 +# define CV_CPU_CALL_NEON(fn, args) +# define CV_CPU_CALL_NEON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA +# define CV_TRY_MSA 1 +# define CV_CPU_FORCE_MSA 1 +# define CV_CPU_HAS_SUPPORT_MSA 1 +# define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA +# define CV_TRY_MSA 1 +# define CV_CPU_FORCE_MSA 0 +# define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA)) +# define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args) +# define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args) +#else +# define CV_TRY_MSA 0 +# define CV_CPU_FORCE_MSA 0 +# define CV_CPU_HAS_SUPPORT_MSA 0 +# define CV_CPU_CALL_MSA(fn, args) +# define CV_CPU_CALL_MSA_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...) CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX +# define CV_TRY_VSX 1 +# define CV_CPU_FORCE_VSX 1 +# define CV_CPU_HAS_SUPPORT_VSX 1 +# define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX +# define CV_TRY_VSX 1 +# define CV_CPU_FORCE_VSX 0 +# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX)) +# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args) +# define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args) +#else +# define CV_TRY_VSX 0 +# define CV_CPU_FORCE_VSX 0 +# define CV_CPU_HAS_SUPPORT_VSX 0 +# define CV_CPU_CALL_VSX(fn, args) +# define CV_CPU_CALL_VSX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3 +# define CV_TRY_VSX3 1 +# define CV_CPU_FORCE_VSX3 1 +# define CV_CPU_HAS_SUPPORT_VSX3 1 +# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3 +# define CV_TRY_VSX3 1 +# define CV_CPU_FORCE_VSX3 0 +# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3)) +# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args) +# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args) +#else +# define CV_TRY_VSX3 0 +# define CV_CPU_FORCE_VSX3 0 +# define CV_CPU_HAS_SUPPORT_VSX3 0 +# define CV_CPU_CALL_VSX3(fn, args) +# define CV_CPU_CALL_VSX3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_RVV +# define CV_TRY_RVV 1 +# define CV_CPU_FORCE_RVV 1 +# define CV_CPU_HAS_SUPPORT_RVV 1 +# define CV_CPU_CALL_RVV(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_RVV_(fn, args) return (opt_RVV::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_RVV +# define CV_TRY_RVV 1 +# define CV_CPU_FORCE_RVV 0 +# define CV_CPU_HAS_SUPPORT_RVV (cv::checkHardwareSupport(CV_CPU_RVV)) +# define CV_CPU_CALL_RVV(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args) +# define CV_CPU_CALL_RVV_(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args) +#else +# define CV_TRY_RVV 0 +# define CV_CPU_FORCE_RVV 0 +# define CV_CPU_HAS_SUPPORT_RVV 0 +# define CV_CPU_CALL_RVV(fn, args) +# define CV_CPU_CALL_RVV_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_RVV(fn, args, mode, ...) CV_CPU_CALL_RVV(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args) +#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvdef.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvdef.h new file mode 100644 index 0000000..d001ebc --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvdef.h @@ -0,0 +1,924 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CVDEF_H +#define OPENCV_CORE_CVDEF_H + +//! @addtogroup core_utils +//! @{ + +#ifdef OPENCV_INCLUDE_PORT_FILE // User-provided header file with custom platform configuration +#include OPENCV_INCLUDE_PORT_FILE +#endif + +#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD +#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \ + (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC)) +// Guard to prevent using of binary incompatible binaries / runtimes +// https://github.com/opencv/opencv/pull/9161 +#define CV__DEBUG_NS_BEGIN namespace debug_build_guard { +#define CV__DEBUG_NS_END } +namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; } +#endif +#endif + +#ifndef CV__DEBUG_NS_BEGIN +#define CV__DEBUG_NS_BEGIN +#define CV__DEBUG_NS_END +#endif + + +#ifdef __OPENCV_BUILD +#include "cvconfig.h" +#endif + +#ifndef __CV_EXPAND +#define __CV_EXPAND(x) x +#endif + +#ifndef __CV_CAT +#define __CV_CAT__(x, y) x ## y +#define __CV_CAT_(x, y) __CV_CAT__(x, y) +#define __CV_CAT(x, y) __CV_CAT_(x, y) +#endif + +#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N +#define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) + +#ifdef CV_Func +// keep current value (through OpenCV port file) +#elif defined __GNUC__ || (defined (__cpluscplus) && (__cpluscplus >= 201103)) +#define CV_Func __func__ +#elif defined __clang__ && (__clang_minor__ * 100 + __clang_major__ >= 305) +#define CV_Func __func__ +#elif defined(__STDC_VERSION__) && (__STDC_VERSION >= 199901) +#define CV_Func __func__ +#elif defined _MSC_VER +#define CV_Func __FUNCTION__ +#elif defined(__INTEL_COMPILER) && (_INTEL_COMPILER >= 600) +#define CV_Func __FUNCTION__ +#elif defined __IBMCPP__ && __IBMCPP__ >=500 +#define CV_Func __FUNCTION__ +#elif defined __BORLAND__ && (__BORLANDC__ >= 0x550) +#define CV_Func __FUNC__ +#else +#define CV_Func "" +#endif + +//! @cond IGNORED + +//////////////// static assert ///////////////// +#define CVAUX_CONCAT_EXP(a, b) a##b +#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b) + +#if defined(__clang__) +# ifndef __has_extension +# define __has_extension __has_feature /* compatibility, for older versions of clang */ +# endif +# if __has_extension(cxx_static_assert) +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# elif __has_extension(c_static_assert) +# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition) +# endif +#elif defined(__GNUC__) +# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# endif +#elif defined(_MSC_VER) +# if _MSC_VER >= 1600 /* MSVC 10 */ +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# endif +#endif +#ifndef CV_StaticAssert +# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302) +# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); }) +# else +namespace cv { + template struct CV_StaticAssert_failed; + template <> struct CV_StaticAssert_failed { enum { val = 1 }; }; + template struct CV_StaticAssert_test {}; +} +# define CV_StaticAssert(condition, reason)\ + typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__) +# endif +#endif + +// Suppress warning "-Wdeprecated-declarations" / C4996 +#if defined(_MSC_VER) + #define CV_DO_PRAGMA(x) __pragma(x) +#elif defined(__GNUC__) + #define CV_DO_PRAGMA(x) _Pragma (#x) +#else + #define CV_DO_PRAGMA(x) +#endif + +#ifdef _MSC_VER +#define CV_SUPPRESS_DEPRECATED_START \ + CV_DO_PRAGMA(warning(push)) \ + CV_DO_PRAGMA(warning(disable: 4996)) +#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop)) +#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405)) +#define CV_SUPPRESS_DEPRECATED_START \ + CV_DO_PRAGMA(GCC diagnostic push) \ + CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") +#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop) +#else +#define CV_SUPPRESS_DEPRECATED_START +#define CV_SUPPRESS_DEPRECATED_END +#endif + +#define CV_UNUSED(name) (void)name + +//! @endcond + +// undef problematic defines sometimes defined by system headers (windows.h in particular) +#undef small +#undef min +#undef max +#undef abs +#undef Complex + +#if defined __cplusplus +#include +#else +#include +#endif + +#include "opencv2/core/hal/interface.h" + +#if defined __ICL +# define CV_ICC __ICL +#elif defined __ICC +# define CV_ICC __ICC +#elif defined __ECL +# define CV_ICC __ECL +#elif defined __ECC +# define CV_ICC __ECC +#elif defined __INTEL_COMPILER +# define CV_ICC __INTEL_COMPILER +#endif + +#ifndef CV_INLINE +# if defined __cplusplus +# define CV_INLINE static inline +# elif defined _MSC_VER +# define CV_INLINE __inline +# else +# define CV_INLINE static +# endif +#endif + +#ifndef CV_ALWAYS_INLINE +#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define CV_ALWAYS_INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define CV_ALWAYS_INLINE __forceinline +#else +#define CV_ALWAYS_INLINE inline +#endif +#endif + +#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED) +# define CV_ENABLE_UNROLLED 0 +#else +# define CV_ENABLE_UNROLLED 1 +#endif + +#ifdef __GNUC__ +# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x))) +#elif defined _MSC_VER +# define CV_DECL_ALIGNED(x) __declspec(align(x)) +#else +# define CV_DECL_ALIGNED(x) +#endif + +/* CPU features and intrinsics support */ +#define CV_CPU_NONE 0 +#define CV_CPU_MMX 1 +#define CV_CPU_SSE 2 +#define CV_CPU_SSE2 3 +#define CV_CPU_SSE3 4 +#define CV_CPU_SSSE3 5 +#define CV_CPU_SSE4_1 6 +#define CV_CPU_SSE4_2 7 +#define CV_CPU_POPCNT 8 +#define CV_CPU_FP16 9 +#define CV_CPU_AVX 10 +#define CV_CPU_AVX2 11 +#define CV_CPU_FMA3 12 + +#define CV_CPU_AVX_512F 13 +#define CV_CPU_AVX_512BW 14 +#define CV_CPU_AVX_512CD 15 +#define CV_CPU_AVX_512DQ 16 +#define CV_CPU_AVX_512ER 17 +#define CV_CPU_AVX_512IFMA512 18 // deprecated +#define CV_CPU_AVX_512IFMA 18 +#define CV_CPU_AVX_512PF 19 +#define CV_CPU_AVX_512VBMI 20 +#define CV_CPU_AVX_512VL 21 +#define CV_CPU_AVX_512VBMI2 22 +#define CV_CPU_AVX_512VNNI 23 +#define CV_CPU_AVX_512BITALG 24 +#define CV_CPU_AVX_512VPOPCNTDQ 25 +#define CV_CPU_AVX_5124VNNIW 26 +#define CV_CPU_AVX_5124FMAPS 27 + +#define CV_CPU_NEON 100 + +#define CV_CPU_MSA 150 + +#define CV_CPU_VSX 200 +#define CV_CPU_VSX3 201 + +#define CV_CPU_RVV 210 + +// CPU features groups +#define CV_CPU_AVX512_SKX 256 +#define CV_CPU_AVX512_COMMON 257 +#define CV_CPU_AVX512_KNL 258 +#define CV_CPU_AVX512_KNM 259 +#define CV_CPU_AVX512_CNL 260 +#define CV_CPU_AVX512_CLX 261 +#define CV_CPU_AVX512_ICL 262 + +// when adding to this list remember to update the following enum +#define CV_HARDWARE_MAX_FEATURE 512 + +/** @brief Available CPU features. +*/ +enum CpuFeatures { + CPU_MMX = 1, + CPU_SSE = 2, + CPU_SSE2 = 3, + CPU_SSE3 = 4, + CPU_SSSE3 = 5, + CPU_SSE4_1 = 6, + CPU_SSE4_2 = 7, + CPU_POPCNT = 8, + CPU_FP16 = 9, + CPU_AVX = 10, + CPU_AVX2 = 11, + CPU_FMA3 = 12, + + CPU_AVX_512F = 13, + CPU_AVX_512BW = 14, + CPU_AVX_512CD = 15, + CPU_AVX_512DQ = 16, + CPU_AVX_512ER = 17, + CPU_AVX_512IFMA512 = 18, // deprecated + CPU_AVX_512IFMA = 18, + CPU_AVX_512PF = 19, + CPU_AVX_512VBMI = 20, + CPU_AVX_512VL = 21, + CPU_AVX_512VBMI2 = 22, + CPU_AVX_512VNNI = 23, + CPU_AVX_512BITALG = 24, + CPU_AVX_512VPOPCNTDQ= 25, + CPU_AVX_5124VNNIW = 26, + CPU_AVX_5124FMAPS = 27, + + CPU_NEON = 100, + + CPU_MSA = 150, + + CPU_VSX = 200, + CPU_VSX3 = 201, + + CPU_RVV = 210, + + CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL + CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512 + CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF + CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ + CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI + CPU_AVX512_CLX = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI + CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ + + CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE +}; + + +#include "cv_cpu_dispatch.h" + +#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64)) +// int*, int64* should be propertly aligned pointers on ARMv7 +#define CV_STRONG_ALIGNMENT 1 +#endif +#if !defined(CV_STRONG_ALIGNMENT) +#define CV_STRONG_ALIGNMENT 0 +#endif + +/* fundamental constants */ +#define CV_PI 3.1415926535897932384626433832795 +#define CV_2PI 6.283185307179586476925286766559 +#define CV_LOG2 0.69314718055994530941723212145818 + +#if defined __ARM_FP16_FORMAT_IEEE \ + && !defined __CUDACC__ +# define CV_FP16_TYPE 1 +#else +# define CV_FP16_TYPE 0 +#endif + +typedef union Cv16suf +{ + short i; + ushort u; +#if CV_FP16_TYPE + __fp16 h; +#endif +} +Cv16suf; + +typedef union Cv32suf +{ + int i; + unsigned u; + float f; +} +Cv32suf; + +typedef union Cv64suf +{ + int64 i; + uint64 u; + double f; +} +Cv64suf; + +#define OPENCV_ABI_COMPATIBILITY 400 + +#ifdef __OPENCV_BUILD +# define DISABLE_OPENCV_3_COMPATIBILITY +# define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY +#endif + +#ifndef CV_EXPORTS +# if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS) +# define CV_EXPORTS __declspec(dllexport) +# elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__)) +# define CV_EXPORTS __attribute__ ((visibility ("default"))) +# endif +#endif + +#ifndef CV_EXPORTS +# define CV_EXPORTS +#endif + +#ifdef _MSC_VER +# define CV_EXPORTS_TEMPLATE +#else +# define CV_EXPORTS_TEMPLATE CV_EXPORTS +#endif + +#ifndef CV_DEPRECATED +# if defined(__GNUC__) +# define CV_DEPRECATED __attribute__ ((deprecated)) +# elif defined(_MSC_VER) +# define CV_DEPRECATED __declspec(deprecated) +# else +# define CV_DEPRECATED +# endif +#endif + +#ifndef CV_DEPRECATED_EXTERNAL +# if defined(__OPENCV_BUILD) +# define CV_DEPRECATED_EXTERNAL /* nothing */ +# else +# define CV_DEPRECATED_EXTERNAL CV_DEPRECATED +# endif +#endif + + +#ifndef CV_EXTERN_C +# ifdef __cplusplus +# define CV_EXTERN_C extern "C" +# else +# define CV_EXTERN_C +# endif +#endif + +/* special informative macros for wrapper generators */ +#define CV_EXPORTS_W CV_EXPORTS +#define CV_EXPORTS_W_SIMPLE CV_EXPORTS +#define CV_EXPORTS_AS(synonym) CV_EXPORTS +#define CV_EXPORTS_W_MAP CV_EXPORTS +#define CV_IN_OUT +#define CV_OUT +#define CV_PROP +#define CV_PROP_RW +#define CV_WRAP +#define CV_WRAP_AS(synonym) +#define CV_WRAP_MAPPABLE(mappable) +#define CV_WRAP_PHANTOM(phantom_header) +#define CV_WRAP_DEFAULT(val) + +/****************************************************************************************\ +* Matrix type (Mat) * +\****************************************************************************************/ + +#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT) +#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1) +#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1) +#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK) +#define CV_MAT_CONT_FLAG_SHIFT 14 +#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT) +#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG) +#define CV_IS_CONT_MAT CV_IS_MAT_CONT +#define CV_SUBMAT_FLAG_SHIFT 15 +#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT) +#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG) + +/** Size of each channel item, + 0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */ +#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15) + +#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type)) + +#ifndef MIN +# define MIN(a,b) ((a) > (b) ? (b) : (a)) +#endif + +#ifndef MAX +# define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + +///////////////////////////////////////// Enum operators /////////////////////////////////////// + +/** + +Provides compatibility operators for both classical and C++11 enum classes, +as well as exposing the C++11 enum class members for backwards compatibility + +@code + // Provides operators required for flag enums + CV_ENUM_FLAGS(AccessFlag) + + // Exposes the listed members of the enum class AccessFlag to the current namespace + CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]); +@endcode +*/ + +#define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST) \ +static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST; \ + +#define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType) \ +static inline bool operator!(const EnumType& val) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return !static_cast(val); \ +} \ + +#define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type) \ +static inline bool operator!=(const Arg1Type& a, const Arg2Type& b) \ +{ \ + return static_cast(a) != static_cast(b); \ +} \ + +#define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type) \ +static inline bool operator==(const Arg1Type& a, const Arg2Type& b) \ +{ \ + return static_cast(a) == static_cast(b); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType) \ +static inline EnumType operator~(const EnumType& val) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(~static_cast(val)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) | static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) & static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) ^ static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) | static_cast(val)); \ + return _this; \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) & static_cast(val)); \ + return _this; \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) ^ static_cast(val)); \ + return _this; \ +} \ + +#define CV_ENUM_CLASS_EXPOSE(EnumType, ...) \ +__CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \ + +#define CV_ENUM_FLAGS(EnumType) \ +__CV_ENUM_FLAGS_LOGICAL_NOT (EnumType) \ +__CV_ENUM_FLAGS_LOGICAL_EQ (EnumType, int) \ +__CV_ENUM_FLAGS_LOGICAL_NOT_EQ (EnumType, int) \ + \ +__CV_ENUM_FLAGS_BITWISE_NOT (EnumType) \ +__CV_ENUM_FLAGS_BITWISE_OR (EnumType, EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_AND (EnumType, EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_XOR (EnumType, EnumType, EnumType) \ + \ +__CV_ENUM_FLAGS_BITWISE_OR_EQ (EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_AND_EQ (EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType) \ + +/****************************************************************************************\ +* static analysys * +\****************************************************************************************/ + +// In practice, some macro are not processed correctly (noreturn is not detected). +// We need to use simplified definition for them. +#ifndef CV_STATIC_ANALYSIS +# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__) +# define CV_STATIC_ANALYSIS 1 +# endif +#else +# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1) // defined and not empty +# if 0 == CV_STATIC_ANALYSIS +# undef CV_STATIC_ANALYSIS +# endif +# endif +#endif + +/****************************************************************************************\ +* Thread sanitizer * +\****************************************************************************************/ +#ifndef CV_THREAD_SANITIZER +# if defined(__has_feature) +# if __has_feature(thread_sanitizer) +# define CV_THREAD_SANITIZER +# endif +# endif +#endif + +/****************************************************************************************\ +* exchange-add operation for atomic operations on reference counters * +\****************************************************************************************/ + +#ifdef CV_XADD + // allow to use user-defined macro +#elif defined __GNUC__ || defined __clang__ +# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) && !defined __INTEL_COMPILER +# ifdef __ATOMIC_ACQ_REL +# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) +# else +# define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) +# endif +# else +# if defined __ATOMIC_ACQ_REL && !defined __clang__ + // version for gcc >= 4.7 +# define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) +# else +# define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) +# endif +# endif +#elif defined _MSC_VER && !defined RC_INVOKED +# include +# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) +#else + #ifdef OPENCV_FORCE_UNSAFE_XADD + CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; } + #else + #error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)" + #endif +#endif + + +/****************************************************************************************\ +* CV_NORETURN attribute * +\****************************************************************************************/ + +#ifndef CV_NORETURN +# if defined(__GNUC__) +# define CV_NORETURN __attribute__((__noreturn__)) +# elif defined(_MSC_VER) && (_MSC_VER >= 1300) +# define CV_NORETURN __declspec(noreturn) +# else +# define CV_NORETURN /* nothing by default */ +# endif +#endif + + +/****************************************************************************************\ +* CV_NODISCARD attribute * +* encourages the compiler to issue a warning if the return value is discarded (C++17) * +\****************************************************************************************/ +#ifndef CV_NODISCARD +# if defined(__GNUC__) +# define CV_NODISCARD __attribute__((__warn_unused_result__)) // at least available with GCC 3.4 +# elif defined(__clang__) && defined(__has_attribute) +# if __has_attribute(__warn_unused_result__) +# define CV_NODISCARD __attribute__((__warn_unused_result__)) +# endif +# endif +#endif +#ifndef CV_NODISCARD +# define CV_NODISCARD /* nothing by default */ +#endif + + +/****************************************************************************************\ +* C++ 11 * +\****************************************************************************************/ +#ifndef CV_CXX11 +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800) +# define CV_CXX11 1 +# endif +#else +# if CV_CXX11 == 0 +# undef CV_CXX11 +# endif +#endif +#ifndef CV_CXX11 +# error "OpenCV 4.x+ requires enabled C++11 support" +#endif + +#define CV_CXX_MOVE_SEMANTICS 1 +#define CV_CXX_MOVE(x) std::move(x) +#define CV_CXX_STD_ARRAY 1 +#include +#ifndef CV_OVERRIDE +# define CV_OVERRIDE override +#endif +#ifndef CV_FINAL +# define CV_FINAL final +#endif + +#ifndef CV_NOEXCEPT +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) +# define CV_NOEXCEPT noexcept +# endif +#endif +#ifndef CV_NOEXCEPT +# define CV_NOEXCEPT +#endif + +#ifndef CV_CONSTEXPR +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) +# define CV_CONSTEXPR constexpr +# endif +#endif +#ifndef CV_CONSTEXPR +# define CV_CONSTEXPR +#endif + +// Integer types portatibility +#ifdef OPENCV_STDINT_HEADER +#include OPENCV_STDINT_HEADER +#elif defined(__cplusplus) +#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */ +namespace cv { +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; +} +#elif defined(_MSC_VER) || __cplusplus >= 201103L +#include +namespace cv { +using std::int8_t; +using std::uint8_t; +using std::int16_t; +using std::uint16_t; +using std::int32_t; +using std::uint32_t; +using std::int64_t; +using std::uint64_t; +} +#else +#include +namespace cv { +typedef ::int8_t int8_t; +typedef ::uint8_t uint8_t; +typedef ::int16_t int16_t; +typedef ::uint16_t uint16_t; +typedef ::int32_t int32_t; +typedef ::uint32_t uint32_t; +typedef ::int64_t int64_t; +typedef ::uint64_t uint64_t; +} +#endif +#else // pure C +#include +#endif + +#ifdef __cplusplus +namespace cv +{ + +class float16_t +{ +public: +#if CV_FP16_TYPE + + float16_t() : h(0) {} + explicit float16_t(float x) { h = (__fp16)x; } + operator float() const { return (float)h; } + static float16_t fromBits(ushort w) + { + Cv16suf u; + u.u = w; + float16_t result; + result.h = u.h; + return result; + } + static float16_t zero() + { + float16_t result; + result.h = (__fp16)0; + return result; + } + ushort bits() const + { + Cv16suf u; + u.h = h; + return u.u; + } +protected: + __fp16 h; + +#else + float16_t() : w(0) {} + explicit float16_t(float x) + { + #if CV_FP16 + __m128 v = _mm_load_ss(&x); + w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0)); + #else + Cv32suf in; + in.f = x; + unsigned sign = in.u & 0x80000000; + in.u ^= sign; + + if( in.u >= 0x47800000 ) + w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00); + else + { + if (in.u < 0x38800000) + { + in.f += 0.5f; + w = (ushort)(in.u - 0x3f000000); + } + else + { + unsigned t = in.u + 0xc8000fff; + w = (ushort)((t + ((in.u >> 13) & 1)) >> 13); + } + } + + w = (ushort)(w | (sign >> 16)); + #endif + } + + operator float() const + { + #if CV_FP16 + float f; + _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w))); + return f; + #else + Cv32suf out; + + unsigned t = ((w & 0x7fff) << 13) + 0x38000000; + unsigned sign = (w & 0x8000) << 16; + unsigned e = w & 0x7c00; + + out.u = t + (1 << 23); + out.u = (e >= 0x7c00 ? t + 0x38000000 : + e == 0 ? (static_cast(out.f -= 6.103515625e-05f), out.u) : t) | sign; + return out.f; + #endif + } + + static float16_t fromBits(ushort b) + { + float16_t result; + result.w = b; + return result; + } + static float16_t zero() + { + float16_t result; + result.w = (ushort)0; + return result; + } + ushort bits() const { return w; } +protected: + ushort w; + +#endif +}; + +} +#endif + +//! @} + +#ifndef __cplusplus +#include "opencv2/core/fast_math.hpp" // define cvRound(double) +#endif + +#endif // OPENCV_CORE_CVDEF_H diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd.hpp new file mode 100644 index 0000000..6ce9e4b --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd.hpp @@ -0,0 +1,190 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CVSTD_HPP +#define OPENCV_CORE_CVSTD_HPP + +#ifndef __cplusplus +# error cvstd.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" +#include +#include +#include + +#include + +// import useful primitives from stl +# include +# include +# include //for abs(int) +# include + +namespace cv +{ + static inline uchar abs(uchar a) { return a; } + static inline ushort abs(ushort a) { return a; } + static inline unsigned abs(unsigned a) { return a; } + static inline uint64 abs(uint64 a) { return a; } + + using std::min; + using std::max; + using std::abs; + using std::swap; + using std::sqrt; + using std::exp; + using std::pow; + using std::log; +} + +#include "cvstd_wrapper.hpp" + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +//////////////////////////// memory management functions //////////////////////////// + +/** @brief Allocates an aligned memory buffer. + +The function allocates the buffer of the specified size and returns it. When the buffer size is 16 +bytes or more, the returned buffer is aligned to 16 bytes. +@param bufSize Allocated buffer size. + */ +CV_EXPORTS void* fastMalloc(size_t bufSize); + +/** @brief Deallocates a memory buffer. + +The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the +function does nothing. C version of the function clears the pointer *pptr* to avoid problems with +double memory deallocation. +@param ptr Pointer to the allocated buffer. + */ +CV_EXPORTS void fastFree(void* ptr); + +/*! + The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree() +*/ +template class Allocator +{ +public: + typedef _Tp value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + template class rebind { typedef Allocator other; }; + + explicit Allocator() {} + ~Allocator() {} + explicit Allocator(Allocator const&) {} + template + explicit Allocator(Allocator const&) {} + + // address + pointer address(reference r) { return &r; } + const_pointer address(const_reference r) { return &r; } + + pointer allocate(size_type count, const void* =0) { return reinterpret_cast(fastMalloc(count * sizeof (_Tp))); } + void deallocate(pointer p, size_type) { fastFree(p); } + + void construct(pointer p, const _Tp& v) { new(static_cast(p)) _Tp(v); } + void destroy(pointer p) { p->~_Tp(); } + + size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); } +}; + +//! @} core_utils + +//! @endcond + +//! @addtogroup core_basic +//! @{ + +//////////////////////////////// string class //////////////////////////////// + +class CV_EXPORTS FileNode; //for string constructor from FileNode + +typedef std::string String; + +#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS + +//! @cond IGNORED +namespace details { +// std::tolower is int->int +static inline char char_tolower(char ch) +{ + return (char)std::tolower((int)ch); +} +// std::toupper is int->int +static inline char char_toupper(char ch) +{ + return (char)std::toupper((int)ch); +} +} // namespace details +//! @endcond + +static inline std::string toLowerCase(const std::string& str) +{ + std::string result(str); + std::transform(result.begin(), result.end(), result.begin(), details::char_tolower); + return result; +} + +static inline std::string toUpperCase(const std::string& str) +{ + std::string result(str); + std::transform(result.begin(), result.end(), result.begin(), details::char_toupper); + return result; +} + +#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS + +//! @} core_basic +} // cv + +#endif //OPENCV_CORE_CVSTD_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd.inl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd.inl.hpp new file mode 100644 index 0000000..37ad1e6 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd.inl.hpp @@ -0,0 +1,197 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CVSTDINL_HPP +#define OPENCV_CORE_CVSTDINL_HPP + +#include +#include +#include + +//! @cond IGNORED + +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + +namespace cv +{ + +template class DataType< std::complex<_Tp> > +{ +public: + typedef std::complex<_Tp> value_type; + typedef value_type work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + depth = DataType::depth, + channels = 2, + fmt = DataType::fmt + ((channels - 1) << 8), + type = CV_MAKETYPE(depth, channels) }; + + typedef Vec vec_type; +}; + +static inline +std::ostream& operator << (std::ostream& out, Ptr fmtd) +{ + fmtd->reset(); + for(const char* str = fmtd->next(); str; str = fmtd->next()) + out << str; + return out; +} + +static inline +std::ostream& operator << (std::ostream& out, const Mat& mtx) +{ + return out << Formatter::get()->format(mtx); +} + +static inline +std::ostream& operator << (std::ostream& out, const UMat& m) +{ + return out << m.getMat(ACCESS_READ); +} + +template static inline +std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c) +{ + return out << "(" << c.re << "," << c.im << ")"; +} + +template static inline +std::ostream& operator << (std::ostream& out, const std::vector >& vec) +{ + return out << Formatter::get()->format(Mat(vec)); +} + + +template static inline +std::ostream& operator << (std::ostream& out, const std::vector >& vec) +{ + return out << Formatter::get()->format(Mat(vec)); +} + + +template static inline +std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx) +{ + return out << Formatter::get()->format(Mat(matx)); +} + +template static inline +std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p) +{ + out << "[" << p.x << ", " << p.y << "]"; + return out; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p) +{ + out << "[" << p.x << ", " << p.y << ", " << p.z << "]"; + return out; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec) +{ + out << "["; + if (cv::traits::Depth<_Tp>::value <= CV_32S) + { + for (int i = 0; i < n - 1; ++i) { + out << (int)vec[i] << ", "; + } + out << (int)vec[n-1] << "]"; + } + else + { + for (int i = 0; i < n - 1; ++i) { + out << vec[i] << ", "; + } + out << vec[n-1] << "]"; + } + + return out; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size) +{ + return out << "[" << size.width << " x " << size.height << "]"; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect) +{ + return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]"; +} + +static inline std::ostream& operator << (std::ostream& out, const MatSize& msize) +{ + int i, dims = msize.dims(); + for( i = 0; i < dims; i++ ) + { + out << msize[i]; + if( i < dims-1 ) + out << " x "; + } + return out; +} + +static inline std::ostream &operator<< (std::ostream &s, cv::Range &r) +{ + return s << "[" << r.start << " : " << r.end << ")"; +} + +} // cv + +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + +//! @endcond + +#endif // OPENCV_CORE_CVSTDINL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd_wrapper.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd_wrapper.hpp new file mode 100644 index 0000000..e2c2ea5 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/cvstd_wrapper.hpp @@ -0,0 +1,154 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP +#define OPENCV_CORE_CVSTD_WRAPPER_HPP + +#include "opencv2/core/cvdef.h" + +#include +#include // std::shared_ptr +#include // std::enable_if + +namespace cv { + +using std::nullptr_t; + +//! @addtogroup core_basic +//! @{ + +#ifdef CV_DOXYGEN + +template using Ptr = std::shared_ptr<_Tp>; // In ideal world it should look like this, but we need some compatibility workarounds below + +template static inline +Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); } + +#else // cv::Ptr with compatibility workarounds + +// It should be defined for C-API types only. +// C++ types should use regular "delete" operator. +template struct DefaultDeleter; +#if 0 +{ + void operator()(Y* p) const; +}; +#endif + +namespace sfinae { +template +struct has_parenthesis_operator +{ +private: + template + static CV_CONSTEXPR std::true_type check(typename std::is_same().operator()(std::declval()...))>::type, Ret>::type*); + + template static CV_CONSTEXPR std::false_type check(...); + + typedef decltype(check(0)) type; + +public: +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) + static CV_CONSTEXPR bool value = type::value; +#else + // support MSVS 2013 + static const int value = type::value; +#endif +}; +} // namespace sfinae + +template +struct has_custom_delete + : public std::false_type {}; + +// Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files +#ifndef __CUDACC__ +template +struct has_custom_delete, void, T*>::value >::type > + : public std::true_type {}; +#endif + +template +struct Ptr : public std::shared_ptr +{ +#if 0 + using std::shared_ptr::shared_ptr; // GCC 5.x can't handle this +#else + inline Ptr() CV_NOEXCEPT : std::shared_ptr() {} + inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr(nullptr) {} + template inline Ptr(Y* p, D d) : std::shared_ptr(p, d) {} + template inline Ptr(nullptr_t, D d) : std::shared_ptr(nullptr, d) {} + + template inline Ptr(const Ptr& r, T* ptr) CV_NOEXCEPT : std::shared_ptr(r, ptr) {} + + inline Ptr(const Ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + inline Ptr(Ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} + + template inline Ptr(const Ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + template inline Ptr(Ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} +#endif + inline Ptr(const std::shared_ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + inline Ptr(std::shared_ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} + + // Overload with custom DefaultDeleter: Ptr(...) + template + inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr(ptr, DefaultDeleter()) {} + + // Overload without custom deleter: Ptr(...); + template + inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr(ptr) {} + + template + inline Ptr(Y* ptr) : Ptr(has_custom_delete(), ptr) {} + + // Overload with custom DefaultDeleter: Ptr(...) + template + inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr::reset(ptr, DefaultDeleter()); } + + // Overload without custom deleter: Ptr(...); + template + inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr::reset(ptr); } + + template + inline void reset(Y* ptr) { Ptr::reset(has_custom_delete(), ptr); } + + template + void reset(Y* ptr, Deleter d) { std::shared_ptr::reset(ptr, d); } + + void reset() CV_NOEXCEPT { std::shared_ptr::reset(); } + + Ptr& operator=(const Ptr& o) { std::shared_ptr::operator =(o); return *this; } + template inline Ptr& operator=(const Ptr& o) { std::shared_ptr::operator =(o); return *this; } + + T* operator->() const CV_NOEXCEPT { return std::shared_ptr::get();} + typename std::add_lvalue_reference::type operator*() const CV_NOEXCEPT { return *std::shared_ptr::get(); } + + // OpenCV 3.x methods (not a part of standard C++ library) + inline void release() { std::shared_ptr::reset(); } + inline operator T* () const { return std::shared_ptr::get(); } + inline bool empty() const { return std::shared_ptr::get() == nullptr; } + + template inline + Ptr staticCast() const CV_NOEXCEPT { return std::static_pointer_cast(*this); } + + template inline + Ptr constCast() const CV_NOEXCEPT { return std::const_pointer_cast(*this); } + + template inline + Ptr dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast(*this); } +}; + +template static inline +Ptr<_Tp> makePtr(const A1&... a1) +{ + static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter"); + return (Ptr<_Tp>)std::make_shared<_Tp>(a1...); +} + +#endif // CV_DOXYGEN + +//! @} core_basic +} // cv + +#endif //OPENCV_CORE_CVSTD_WRAPPER_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/detail/async_promise.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/detail/async_promise.hpp new file mode 100644 index 0000000..6eb3fb5 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/detail/async_promise.hpp @@ -0,0 +1,71 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ASYNC_PROMISE_HPP +#define OPENCV_CORE_ASYNC_PROMISE_HPP + +#include "../async.hpp" + +#include "exception_ptr.hpp" + +namespace cv { + +/** @addtogroup core_async +@{ +*/ + + +/** @brief Provides result of asynchronous operations + +*/ +class CV_EXPORTS AsyncPromise +{ +public: + ~AsyncPromise() CV_NOEXCEPT; + AsyncPromise() CV_NOEXCEPT; + explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT; + AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT; + void release() CV_NOEXCEPT; + + /** Returns associated AsyncArray + @note Can be called once + */ + AsyncArray getArrayResult(); + + /** Stores asynchronous result. + @param[in] value result + */ + void setValue(InputArray value); + + // TODO "move" setters + +#if CV__EXCEPTION_PTR + /** Stores exception. + @param[in] exception exception to be raised in AsyncArray + */ + void setException(std::exception_ptr exception); +#endif + + /** Stores exception. + @param[in] exception exception to be raised in AsyncArray + */ + void setException(const cv::Exception& exception); + +#ifdef CV_CXX11 + explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; } + AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; } +#endif + + + // PImpl + typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl; + inline void* _getImpl() const CV_NOEXCEPT { return p; } +protected: + Impl* p; +}; + + +//! @} +} // namespace +#endif // OPENCV_CORE_ASYNC_PROMISE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/detail/exception_ptr.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/detail/exception_ptr.hpp new file mode 100644 index 0000000..d98ffc4 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/detail/exception_ptr.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H +#define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H + +#ifndef CV__EXCEPTION_PTR +# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2 +# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938 +# elif defined(CV_CXX11) +# define CV__EXCEPTION_PTR 1 +# elif defined(_MSC_VER) +# define CV__EXCEPTION_PTR (_MSC_VER >= 1600) +# elif defined(__clang__) +# define CV__EXCEPTION_PTR 0 // C++11 only (see above) +# elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__) +# define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0) +# endif +#endif +#ifndef CV__EXCEPTION_PTR +# define CV__EXCEPTION_PTR 0 +#elif CV__EXCEPTION_PTR +# include // std::exception_ptr +#endif + +#endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/directx.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/directx.hpp new file mode 100644 index 0000000..056a85a --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/directx.hpp @@ -0,0 +1,184 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_DIRECTX_HPP +#define OPENCV_CORE_DIRECTX_HPP + +#include "mat.hpp" +#include "ocl.hpp" + +#if !defined(__d3d11_h__) +struct ID3D11Device; +struct ID3D11Texture2D; +#endif + +#if !defined(__d3d10_h__) +struct ID3D10Device; +struct ID3D10Texture2D; +#endif + +#if !defined(_D3D9_H_) +struct IDirect3DDevice9; +struct IDirect3DDevice9Ex; +struct IDirect3DSurface9; +#endif + + +namespace cv { namespace directx { + +namespace ocl { +using namespace cv::ocl; + +//! @addtogroup core_directx +// This section describes OpenCL and DirectX interoperability. +// +// To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is +// supported only on Windows. +// +// To use OpenCL functionality you should first initialize OpenCL context from DirectX resource. +// +//! @{ + +// TODO static functions in the Context class +//! @brief Creates OpenCL context from D3D11 device +// +//! @param pD3D11Device - pointer to D3D11 device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device); + +//! @brief Creates OpenCL context from D3D10 device +// +//! @param pD3D10Device - pointer to D3D10 device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device); + +//! @brief Creates OpenCL context from Direct3DDevice9Ex device +// +//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex); + +//! @brief Creates OpenCL context from Direct3DDevice9 device +// +//! @param pDirect3DDevice9 - pointer to Direct3Device9 device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9); + +//! @} + +} // namespace cv::directx::ocl + +//! @addtogroup core_directx +//! @{ + +//! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then +//! input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12. +// +//! @note Note: Destination texture must be allocated by application. Function does memory copy from src to +//! pD3D11Texture2D +// +//! @param src - source InputArray +//! @param pD3D11Texture2D - destination D3D11 texture +CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D); + +//! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then +//! data will be upsampled and color-converted to BGR format. +// +//! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size. +//! function does memory copy from pD3D11Texture2D to dst +// +//! @param pD3D11Texture2D - source D3D11 texture +//! @param dst - destination OutputArray +CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst); + +//! @brief Converts InputArray to ID3D10Texture2D +// +//! @note Note: function does memory copy from src to +//! pD3D10Texture2D +// +//! @param src - source InputArray +//! @param pD3D10Texture2D - destination D3D10 texture +CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D); + +//! @brief Converts ID3D10Texture2D to OutputArray +// +//! @note Note: function does memory copy from pD3D10Texture2D +//! to dst +// +//! @param pD3D10Texture2D - source D3D10 texture +//! @param dst - destination OutputArray +CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst); + +//! @brief Converts InputArray to IDirect3DSurface9 +// +//! @note Note: function does memory copy from src to +//! pDirect3DSurface9 +// +//! @param src - source InputArray +//! @param pDirect3DSurface9 - destination D3D10 texture +//! @param surfaceSharedHandle - shared handle +CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL); + +//! @brief Converts IDirect3DSurface9 to OutputArray +// +//! @note Note: function does memory copy from pDirect3DSurface9 +//! to dst +// +//! @param pDirect3DSurface9 - source D3D10 texture +//! @param dst - destination OutputArray +//! @param surfaceSharedHandle - shared handle +CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL); + +//! @brief Get OpenCV type from DirectX type +//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11 +//! @return OpenCV type or -1 if there is no equivalent +CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11 + +//! @brief Get OpenCV type from DirectX type +//! @param iD3DFORMAT - enum D3DTYPE for D3D9 +//! @return OpenCV type or -1 if there is no equivalent +CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9 + +//! @} + +} } // namespace cv::directx + +#endif // OPENCV_CORE_DIRECTX_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/eigen.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/eigen.hpp new file mode 100644 index 0000000..51f4147 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/eigen.hpp @@ -0,0 +1,402 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#ifndef OPENCV_CORE_EIGEN_HPP +#define OPENCV_CORE_EIGEN_HPP + +#ifndef EIGEN_WORLD_VERSION +#error "Wrong usage of OpenCV's Eigen utility header. Include Eigen's headers first. See https://github.com/opencv/opencv/issues/17366" +#endif + +#include "opencv2/core.hpp" + +#if defined _MSC_VER && _MSC_VER >= 1200 +#define NOMINMAX // fix https://github.com/opencv/opencv/issues/17548 +#pragma warning( disable: 4714 ) //__forceinline is not inlined +#pragma warning( disable: 4127 ) //conditional expression is constant +#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data +#endif + +#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT) +#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \ + && defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY) +#include +#define OPENCV_EIGEN_TENSOR_SUPPORT 1 +#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 +#endif // !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT) + +namespace cv +{ + +/** @addtogroup core_eigen +These functions are provided for OpenCV-Eigen interoperability. They convert `Mat` +objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen +documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for +information about the `Matrix` template type. + +@note Using these functions requires the `Eigen/Dense` or similar header to be +included before this header. +*/ +//! @{ + +#if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN) +/** @brief Converts an Eigen::Tensor to a cv::Mat. + +The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where: + H = number of rows + W = number of columns + C = number of channels + +Usage: +\code +Eigen::Tensor a_tensor(...); +// populate tensor with values +Mat a_mat; +eigen2cv(a_tensor, a_mat); +\endcode +*/ +template static inline +void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst ) +{ + if( !(_layout & Eigen::RowMajorBit) ) + { + const std::array shuffle{2, 1, 0}; + Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle); + Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data()); + _src.copyTo(dst); + } + else + { + Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data()); + _src.copyTo(dst); + } +} + +/** @brief Converts a cv::Mat to an Eigen::Tensor. + +The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where: + H = number of rows + W = number of columns + C = number of channels + +Usage: +\code +Mat a_mat(...); +// populate Mat with values +Eigen::Tensor a_tensor(...); +cv2eigen(a_mat, a_tensor); +\endcode +*/ +template static inline +void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst ) +{ + if( !(_layout & Eigen::RowMajorBit) ) + { + Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels()); + Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data()); + if (src.type() == _dst.type()) + src.copyTo(_dst); + else + src.convertTo(_dst, _dst.type()); + const std::array shuffle{2, 1, 0}; + dst = row_major_tensor.swap_layout().shuffle(shuffle); + } + else + { + dst.resize(src.rows, src.cols, src.channels()); + Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data()); + if (src.type() == _dst.type()) + src.copyTo(_dst); + else + src.convertTo(_dst, _dst.type()); + } +} + +/** @brief Maps cv::Mat data to an Eigen::TensorMap. + +The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where: + H = number of rows + W = number of columns + C = number of channels + +Explicit instantiation of the return type is required. + +@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures. +The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated. + +The example below initializes a cv::Mat and produces an Eigen::TensorMap: +\code +float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; +Mat a_mat(2, 2, CV_32FC3, arr); +Eigen::TensorMap> a_tensormap = cv2eigen_tensormap(a_mat); +\endcode +*/ +template static inline +Eigen::TensorMap> cv2eigen_tensormap(InputArray src) +{ + Mat mat = src.getMat(); + CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), ""); + return Eigen::TensorMap>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels()); +} +#endif // OPENCV_EIGEN_TENSOR_SUPPORT + +template static inline +void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst ) +{ + if( !(src.Flags & Eigen::RowMajorBit) ) + { + Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value, + (void*)src.data(), src.outerStride()*sizeof(_Tp)); + transpose(_src, dst); + } + else + { + Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value, + (void*)src.data(), src.outerStride()*sizeof(_Tp)); + _src.copyTo(dst); + } +} + +// Matx case +template static inline +void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, + Matx<_Tp, _rows, _cols>& dst ) +{ + if( !(src.Flags & Eigen::RowMajorBit) ) + { + dst = Matx<_Tp, _cols, _rows>(static_cast(src.data())).t(); + } + else + { + dst = Matx<_Tp, _rows, _cols>(static_cast(src.data())); + } +} + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst ) +{ + CV_DbgAssert(src.rows == _rows && src.cols == _cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else if( src.cols == src.rows ) + { + src.convertTo(_dst, _dst.type()); + transpose(_dst, _dst); + } + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +// Matx case +template static inline +void cv2eigen( const Matx<_Tp, _rows, _cols>& src, + Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst ) +{ + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(_cols, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(_rows, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + Mat(src).copyTo(_dst); + } +} + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst ) +{ + dst.resize(src.rows, src.cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else if( src.cols == src.rows ) + { + src.convertTo(_dst, _dst.type()); + transpose(_dst, _dst); + } + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +// Matx case +template static inline +void cv2eigen( const Matx<_Tp, _rows, _cols>& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst ) +{ + dst.resize(_rows, _cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(_cols, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(_rows, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + Mat(src).copyTo(_dst); + } +} + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst ) +{ + CV_Assert(src.cols == 1); + dst.resize(src.rows); + + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +// Matx case +template static inline +void cv2eigen( const Matx<_Tp, _rows, 1>& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst ) +{ + dst.resize(_rows); + + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(1, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(_rows, 1, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.copyTo(_dst); + } +} + + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst ) +{ + CV_Assert(src.rows == 1); + dst.resize(src.cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +//Matx +template static inline +void cv2eigen( const Matx<_Tp, 1, _cols>& src, + Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst ) +{ + dst.resize(_cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(_cols, 1, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(1, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + Mat(src).copyTo(_dst); + } +} + +//! @} + +} // cv + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/fast_math.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/fast_math.hpp new file mode 100644 index 0000000..0f53cf5 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/fast_math.hpp @@ -0,0 +1,408 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_FAST_MATH_HPP +#define OPENCV_CORE_FAST_MATH_HPP + +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils +//! @{ + +/****************************************************************************************\ +* fast math * +\****************************************************************************************/ + +#ifdef __cplusplus +# include +#else +# ifdef __BORLANDC__ +# include +# else +# include +# endif +#endif + +#if defined(__CUDACC__) + // nothing, intrinsics/asm code is not supported +#else + #if ((defined _MSC_VER && defined _M_X64) \ + || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \ + && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H) + #include + #endif + + #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \ + && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H) + #include + #endif + + #if defined(CV_INLINE_ROUND_FLT) + // user-specified version + // CV_INLINE_ROUND_DBL should be defined too + #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ + // 1. general scheme + #define ARM_ROUND(_value, _asm_string) \ + int res; \ + float temp; \ + CV_UNUSED(temp); \ + __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \ + return res + // 2. version for double + #ifdef __clang__ + #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]") + #else + #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]") + #endif + // 3. version for float + #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") + #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 + // P8 and newer machines can convert fp32/64 to int quickly. + #define CV_INLINE_ROUND_DBL(value) \ + int out; \ + double temp; \ + __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \ + return out; + + // FP32 also works with FP64 routine above + #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value) + #endif + + #ifdef CV_INLINE_ISINF_FLT + // user-specified version + // CV_INLINE_ISINF_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30); + #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value) + #endif + + #ifdef CV_INLINE_ISNAN_FLT + // user-specified version + // CV_INLINE_ISNAN_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40); + #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) + #endif + + #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \ + && ( \ + defined(__x86_64__) || defined(__i686__) \ + || defined(__arm__) \ + || defined(__PPC64__) \ + ) + /* Let builtin C math functions when available. Dedicated hardware is available to + round and convert FP values. */ + #define OPENCV_USE_FASTMATH_BUILTINS 1 + #endif + + /* Enable builtin math functions if possible, desired, and available. + Note, not all math functions inline equally. E.g lrint will not inline + without the -fno-math-errno option. */ + #if defined(CV_ICC) + // nothing + #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS + #if defined(__clang__) + #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value); + #endif + #elif defined(__GNUC__) + #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value); + #endif + #elif defined(_MSC_VER) + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return isinf(value); + #endif + #endif + #endif + +#endif // defined(__CUDACC__) + +/** @brief Rounds floating-point number to the nearest integer + + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int +cvRound( double value ) +{ +#if defined CV_INLINE_ROUND_DBL + CV_INLINE_ROUND_DBL(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) + __m128d t = _mm_set_sd( value ); + return _mm_cvtsd_si32(t); +#elif defined _MSC_VER && defined _M_IX86 + int t; + __asm + { + fld value; + fistp t; + } + return t; +#elif defined CV_ICC || defined __GNUC__ + return (int)(lrint(value)); +#else + /* it's ok if round does not comply with IEEE754 standard; + the tests should allow +/-1 difference when the tested functions use round */ + return (int)(value + (value >= 0 ? 0.5 : -0.5)); +#endif +} + + +/** @brief Rounds floating-point number to the nearest integer not larger than the original. + + The function computes an integer i such that: + \f[i \le \texttt{value} < i+1\f] + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int cvFloor( double value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_floor(value); +#else + int i = (int)value; + return i - (i > value); +#endif +} + +/** @brief Rounds floating-point number to the nearest integer not smaller than the original. + + The function computes an integer i such that: + \f[i \le \texttt{value} < i+1\f] + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int cvCeil( double value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_ceil(value); +#else + int i = (int)value; + return i + (i < value); +#endif +} + +/** @brief Determines if the argument is Not A Number. + + @param value The input floating-point value + + The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0 + otherwise. */ +CV_INLINE int cvIsNaN( double value ) +{ +#if defined CV_INLINE_ISNAN_DBL + CV_INLINE_ISNAN_DBL(value); +#else + Cv64suf ieee754; + ieee754.f = value; + return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) + + ((unsigned)ieee754.u != 0) > 0x7ff00000; +#endif +} + +/** @brief Determines if the argument is Infinity. + + @param value The input floating-point value + + The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard) + and 0 otherwise. */ +CV_INLINE int cvIsInf( double value ) +{ +#if defined CV_INLINE_ISINF_DBL + CV_INLINE_ISINF_DBL(value); +#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__) + Cv64suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff00000000) == + 0x7ff0000000000000; +#else + Cv64suf ieee754; + ieee754.f = value; + return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 && + (unsigned)ieee754.u == 0; +#endif +} + +#ifdef __cplusplus + +/** @overload */ +CV_INLINE int cvRound(float value) +{ +#if defined CV_INLINE_ROUND_FLT + CV_INLINE_ROUND_FLT(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) + __m128 t = _mm_set_ss( value ); + return _mm_cvtss_si32(t); +#elif defined _MSC_VER && defined _M_IX86 + int t; + __asm + { + fld value; + fistp t; + } + return t; +#elif defined CV_ICC || defined __GNUC__ + return (int)(lrintf(value)); +#else + /* it's ok if round does not comply with IEEE754 standard; + the tests should allow +/-1 difference when the tested functions use round */ + return (int)(value + (value >= 0 ? 0.5f : -0.5f)); +#endif +} + +/** @overload */ +CV_INLINE int cvRound( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvFloor( float value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_floorf(value); +#else + int i = (int)value; + return i - (i > value); +#endif +} + +/** @overload */ +CV_INLINE int cvFloor( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvCeil( float value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_ceilf(value); +#else + int i = (int)value; + return i + (i < value); +#endif +} + +/** @overload */ +CV_INLINE int cvCeil( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvIsNaN( float value ) +{ +#if defined CV_INLINE_ISNAN_FLT + CV_INLINE_ISNAN_FLT(value); +#else + Cv32suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff) > 0x7f800000; +#endif +} + +/** @overload */ +CV_INLINE int cvIsInf( float value ) +{ +#if defined CV_INLINE_ISINF_FLT + CV_INLINE_ISINF_FLT(value); +#else + Cv32suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff) == 0x7f800000; +#endif +} + +#endif // __cplusplus + +//! @} core_utils + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/hal.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/hal.hpp new file mode 100644 index 0000000..0d68078 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/hal.hpp @@ -0,0 +1,256 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_HPP +#define OPENCV_HAL_HPP + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/hal/interface.h" + +namespace cv { namespace hal { + +//! @addtogroup core_hal_functions +//! @{ + +CV_EXPORTS int normHamming(const uchar* a, int n); +CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n); + +CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize); +CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize); + +CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags); +CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags); +CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors); +CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors); + +CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step, + float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step, + double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step, + float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step, + double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); + +CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n); +CV_EXPORTS float normL1_(const float* a, const float* b, int n); +CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n); + +CV_EXPORTS void exp32f(const float* src, float* dst, int n); +CV_EXPORTS void exp64f(const double* src, double* dst, int n); +CV_EXPORTS void log32f(const float* src, float* dst, int n); +CV_EXPORTS void log64f(const double* src, double* dst, int n); + +CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees); +CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees); +CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n); +CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n); +CV_EXPORTS void sqrt32f(const float* src, float* dst, int len); +CV_EXPORTS void sqrt64f(const double* src, double* dst, int len); +CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len); +CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len); + +CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn ); +CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn ); +CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn ); +CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn ); + +CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn ); +CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn ); +CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn ); +CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn ); + +CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); + +CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars ); +CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); + +CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len ); +CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len ); + +CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len ); +CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len ); + +struct CV_EXPORTS DFT1D +{ + static Ptr create(int len, int count, int depth, int flags, bool * useBuffer = 0); + virtual void apply(const uchar *src, uchar *dst) = 0; + virtual ~DFT1D() {} +}; + +struct CV_EXPORTS DFT2D +{ + static Ptr create(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows = 0); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DFT2D() {} +}; + +struct CV_EXPORTS DCT2D +{ + static Ptr create(int width, int height, int depth, int flags); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DCT2D() {} +}; + +//! @} core_hal + +//============================================================================= +// for binary compatibility with 3.0 + +//! @cond IGNORED + +CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n); + +CV_EXPORTS void exp(const float* src, float* dst, int n); +CV_EXPORTS void exp(const double* src, double* dst, int n); +CV_EXPORTS void log(const float* src, float* dst, int n); +CV_EXPORTS void log(const double* src, double* dst, int n); + +CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees); +CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n); +CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n); +CV_EXPORTS void sqrt(const float* src, float* dst, int len); +CV_EXPORTS void sqrt(const double* src, double* dst, int len); +CV_EXPORTS void invSqrt(const float* src, float* dst, int len); +CV_EXPORTS void invSqrt(const double* src, double* dst, int len); + +//! @endcond + +}} //cv::hal + +#endif //OPENCV_HAL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/interface.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/interface.h new file mode 100644 index 0000000..6f0a83d --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/interface.h @@ -0,0 +1,190 @@ +#ifndef OPENCV_CORE_HAL_INTERFACE_H +#define OPENCV_CORE_HAL_INTERFACE_H + +//! @addtogroup core_hal_interface +//! @{ + +//! @name Return codes +//! @{ +#define CV_HAL_ERROR_OK 0 +#define CV_HAL_ERROR_NOT_IMPLEMENTED 1 +#define CV_HAL_ERROR_UNKNOWN -1 +//! @} + +#ifdef __cplusplus +#include +#else +#include +#include +#endif + +//! @name Data types +//! primitive types +//! - schar - signed 1 byte integer +//! - uchar - unsigned 1 byte integer +//! - short - signed 2 byte integer +//! - ushort - unsigned 2 byte integer +//! - int - signed 4 byte integer +//! - uint - unsigned 4 byte integer +//! - int64 - signed 8 byte integer +//! - uint64 - unsigned 8 byte integer +//! @{ +#if !defined _MSC_VER && !defined __BORLANDC__ +# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__ +# include +# ifdef __NEWLIB__ + typedef unsigned int uint; +# else + typedef std::uint32_t uint; +# endif +# else +# include + typedef uint32_t uint; +# endif +#else + typedef unsigned uint; +#endif + +typedef signed char schar; + +#ifndef __IPL_H__ + typedef unsigned char uchar; + typedef unsigned short ushort; +#endif + +#if defined _MSC_VER || defined __BORLANDC__ + typedef __int64 int64; + typedef unsigned __int64 uint64; +# define CV_BIG_INT(n) n##I64 +# define CV_BIG_UINT(n) n##UI64 +#else + typedef int64_t int64; + typedef uint64_t uint64; +# define CV_BIG_INT(n) n##LL +# define CV_BIG_UINT(n) n##ULL +#endif + +#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0" + +#define CV_CN_MAX 512 +#define CV_CN_SHIFT 3 +#define CV_DEPTH_MAX (1 << CV_CN_SHIFT) + +#define CV_8U 0 +#define CV_8S 1 +#define CV_16U 2 +#define CV_16S 3 +#define CV_32S 4 +#define CV_32F 5 +#define CV_64F 6 +#define CV_16F 7 + +#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) +#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) + +#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) +#define CV_MAKE_TYPE CV_MAKETYPE + +#define CV_8UC1 CV_MAKETYPE(CV_8U,1) +#define CV_8UC2 CV_MAKETYPE(CV_8U,2) +#define CV_8UC3 CV_MAKETYPE(CV_8U,3) +#define CV_8UC4 CV_MAKETYPE(CV_8U,4) +#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) + +#define CV_8SC1 CV_MAKETYPE(CV_8S,1) +#define CV_8SC2 CV_MAKETYPE(CV_8S,2) +#define CV_8SC3 CV_MAKETYPE(CV_8S,3) +#define CV_8SC4 CV_MAKETYPE(CV_8S,4) +#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) + +#define CV_16UC1 CV_MAKETYPE(CV_16U,1) +#define CV_16UC2 CV_MAKETYPE(CV_16U,2) +#define CV_16UC3 CV_MAKETYPE(CV_16U,3) +#define CV_16UC4 CV_MAKETYPE(CV_16U,4) +#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) + +#define CV_16SC1 CV_MAKETYPE(CV_16S,1) +#define CV_16SC2 CV_MAKETYPE(CV_16S,2) +#define CV_16SC3 CV_MAKETYPE(CV_16S,3) +#define CV_16SC4 CV_MAKETYPE(CV_16S,4) +#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) + +#define CV_32SC1 CV_MAKETYPE(CV_32S,1) +#define CV_32SC2 CV_MAKETYPE(CV_32S,2) +#define CV_32SC3 CV_MAKETYPE(CV_32S,3) +#define CV_32SC4 CV_MAKETYPE(CV_32S,4) +#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) + +#define CV_32FC1 CV_MAKETYPE(CV_32F,1) +#define CV_32FC2 CV_MAKETYPE(CV_32F,2) +#define CV_32FC3 CV_MAKETYPE(CV_32F,3) +#define CV_32FC4 CV_MAKETYPE(CV_32F,4) +#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) + +#define CV_64FC1 CV_MAKETYPE(CV_64F,1) +#define CV_64FC2 CV_MAKETYPE(CV_64F,2) +#define CV_64FC3 CV_MAKETYPE(CV_64F,3) +#define CV_64FC4 CV_MAKETYPE(CV_64F,4) +#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) + +#define CV_16FC1 CV_MAKETYPE(CV_16F,1) +#define CV_16FC2 CV_MAKETYPE(CV_16F,2) +#define CV_16FC3 CV_MAKETYPE(CV_16F,3) +#define CV_16FC4 CV_MAKETYPE(CV_16F,4) +#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n)) +//! @} + +//! @name Comparison operation +//! @sa cv::CmpTypes +//! @{ +#define CV_HAL_CMP_EQ 0 +#define CV_HAL_CMP_GT 1 +#define CV_HAL_CMP_GE 2 +#define CV_HAL_CMP_LT 3 +#define CV_HAL_CMP_LE 4 +#define CV_HAL_CMP_NE 5 +//! @} + +//! @name Border processing modes +//! @sa cv::BorderTypes +//! @{ +#define CV_HAL_BORDER_CONSTANT 0 +#define CV_HAL_BORDER_REPLICATE 1 +#define CV_HAL_BORDER_REFLECT 2 +#define CV_HAL_BORDER_WRAP 3 +#define CV_HAL_BORDER_REFLECT_101 4 +#define CV_HAL_BORDER_TRANSPARENT 5 +#define CV_HAL_BORDER_ISOLATED 16 +//! @} + +//! @name DFT flags +//! @{ +#define CV_HAL_DFT_INVERSE 1 +#define CV_HAL_DFT_SCALE 2 +#define CV_HAL_DFT_ROWS 4 +#define CV_HAL_DFT_COMPLEX_OUTPUT 16 +#define CV_HAL_DFT_REAL_OUTPUT 32 +#define CV_HAL_DFT_TWO_STAGE 64 +#define CV_HAL_DFT_STAGE_COLS 128 +#define CV_HAL_DFT_IS_CONTINUOUS 512 +#define CV_HAL_DFT_IS_INPLACE 1024 +//! @} + +//! @name SVD flags +//! @{ +#define CV_HAL_SVD_NO_UV 1 +#define CV_HAL_SVD_SHORT_UV 2 +#define CV_HAL_SVD_MODIFY_A 4 +#define CV_HAL_SVD_FULL_UV 8 +//! @} + +//! @name Gemm flags +//! @{ +#define CV_HAL_GEMM_1_T 1 +#define CV_HAL_GEMM_2_T 2 +#define CV_HAL_GEMM_3_T 4 +//! @} + +//! @} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin.hpp new file mode 100644 index 0000000..753cd21 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin.hpp @@ -0,0 +1,524 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_INTRIN_HPP +#define OPENCV_HAL_INTRIN_HPP + +#include +#include +#include +#include "opencv2/core/cvdef.h" + +#define OPENCV_HAL_ADD(a, b) ((a) + (b)) +#define OPENCV_HAL_AND(a, b) ((a) & (b)) +#define OPENCV_HAL_NOP(a) (a) +#define OPENCV_HAL_1ST(a, b) (a) + +namespace { +inline unsigned int trailingZeros32(unsigned int value) { +#if defined(_MSC_VER) +#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64) + unsigned long index = 0; + _BitScanForward(&index, value); + return (unsigned int)index; +#elif defined(__clang__) + // clang-cl doesn't export _tzcnt_u32 for non BMI systems + return value ? __builtin_ctz(value) : 32; +#else + return _tzcnt_u32(value); +#endif +#elif defined(__GNUC__) || defined(__GNUG__) + return __builtin_ctz(value); +#elif defined(__ICC) || defined(__INTEL_COMPILER) + return _bit_scan_forward(value); +#elif defined(__clang__) + return llvm.cttz.i32(value, true); +#else + static const int MultiplyDeBruijnBitPosition[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; + return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27]; +#endif +} +} + +// unlike HAL API, which is in cv::hal, +// we put intrinsics into cv namespace to make its +// access from within opencv code more accessible +namespace cv { + +namespace hal { + +enum StoreMode +{ + STORE_UNALIGNED = 0, + STORE_ALIGNED = 1, + STORE_ALIGNED_NOCACHE = 2 +}; + +} + +// TODO FIXIT: Don't use "God" traits. Split on separate cases. +template struct V_TypeTraits +{ +}; + +#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \ + template<> struct V_TypeTraits \ + { \ + typedef type value_type; \ + typedef int_type_ int_type; \ + typedef abs_type_ abs_type; \ + typedef uint_type_ uint_type; \ + typedef w_type_ w_type; \ + typedef q_type_ q_type; \ + typedef sum_type_ sum_type; \ + enum { nlanes128 = nlanes128_ }; \ + \ + static inline int_type reinterpret_int(type x) \ + { \ + union { type l; int_type i; } v; \ + v.l = x; \ + return v.i; \ + } \ + \ + static inline type reinterpret_from_int(int_type x) \ + { \ + union { type l; int_type i; } v; \ + v.i = x; \ + return v.l; \ + } \ + } + +#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_, nlanes128_) \ + template<> struct V_TypeTraits \ + { \ + typedef type value_type; \ + typedef int_type_ int_type; \ + typedef abs_type_ abs_type; \ + typedef uint_type_ uint_type; \ + typedef w_type_ w_type; \ + typedef sum_type_ sum_type; \ + enum { nlanes128 = nlanes128_ }; \ + \ + static inline int_type reinterpret_int(type x) \ + { \ + union { type l; int_type i; } v; \ + v.l = x; \ + return v.i; \ + } \ + \ + static inline type reinterpret_from_int(int_type x) \ + { \ + union { type l; int_type i; } v; \ + v.i = x; \ + return v.l; \ + } \ + } + +CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16); +CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16); +CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8); +CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned, 4); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int, 4); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float, 4); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64, 2); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64, 2); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double, 2); + +#ifndef CV_DOXYGEN + +#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE +#ifdef CV_FORCE_SIMD128_CPP + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#elif defined(CV_CPU_DISPATCH_MODE) + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#else + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#endif +#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; +#endif +} + +#ifdef CV_DOXYGEN +# undef CV_AVX2 +# undef CV_SSE2 +# undef CV_NEON +# undef CV_VSX +# undef CV_FP16 +# undef CV_MSA +# undef CV_RVV +#endif + +#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP) +#define CV__SIMD_FORWARD 128 +#include "opencv2/core/hal/intrin_forward.hpp" +#endif + +#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_sse_em.hpp" +#include "opencv2/core/hal/intrin_sse.hpp" + +#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_neon.hpp" + +#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_vsx.hpp" + +#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_msa.hpp" + +#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP) +#include "opencv2/core/hal/intrin_wasm.hpp" + +#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP) +#include "opencv2/core/hal/intrin_rvv.hpp" + +#else + +#include "opencv2/core/hal/intrin_cpp.hpp" + +#endif + +// AVX2 can be used together with SSE2, so +// we define those two sets of intrinsics at once. +// Most of the intrinsics do not conflict (the proper overloaded variant is +// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2), +// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load(). +// Correspondingly, the wide intrinsics (which are mapped to the "widest" +// available instruction set) will get vx_ prefix +// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load()) +#if CV_AVX2 + +#define CV__SIMD_FORWARD 256 +#include "opencv2/core/hal/intrin_forward.hpp" +#include "opencv2/core/hal/intrin_avx.hpp" + +#endif + +// AVX512 can be used together with SSE2 and AVX2, so +// we define those sets of intrinsics at once. +// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load(). +// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load()) +#if CV_AVX512_SKX + +#define CV__SIMD_FORWARD 512 +#include "opencv2/core/hal/intrin_forward.hpp" +#include "opencv2/core/hal/intrin_avx512.hpp" + +#endif + +//! @cond IGNORED + +namespace cv { + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + +#ifndef CV_SIMD128 +#define CV_SIMD128 0 +#endif + +#ifndef CV_SIMD128_CPP +#define CV_SIMD128_CPP 0 +#endif + +#ifndef CV_SIMD128_64F +#define CV_SIMD128_64F 0 +#endif + +#ifndef CV_SIMD256 +#define CV_SIMD256 0 +#endif + +#ifndef CV_SIMD256_64F +#define CV_SIMD256_64F 0 +#endif + +#ifndef CV_SIMD512 +#define CV_SIMD512 0 +#endif + +#ifndef CV_SIMD512_64F +#define CV_SIMD512_64F 0 +#endif + +#ifndef CV_SIMD128_FP16 +#define CV_SIMD128_FP16 0 +#endif + +#ifndef CV_SIMD256_FP16 +#define CV_SIMD256_FP16 0 +#endif + +#ifndef CV_SIMD512_FP16 +#define CV_SIMD512_FP16 0 +#endif + +//================================================================================================== + +#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ + inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \ + inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \ + inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \ + inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \ + inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \ + inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \ + inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \ + inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \ + inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \ + inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); } + +#define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \ + inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); } + +#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ + inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); } + +#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \ + inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); } + +#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) + +#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix) + +template struct V_RegTraits +{ +}; + +#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \ + template<> struct V_RegTraits<_reg> \ + { \ + typedef _reg reg; \ + typedef _u_reg u_reg; \ + typedef _w_reg w_reg; \ + typedef _q_reg q_reg; \ + typedef _int_reg int_reg; \ + typedef _round_reg round_reg; \ + } + +#if CV_SIMD128 || CV_SIMD128_CPP + CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void); + CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void); + CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void); + CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void); + CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void); + CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void); +#if CV_SIMD128_64F || CV_SIMD128_CPP + CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4); +#else + CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4); +#endif + CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void); + CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void); +#if CV_SIMD128_64F + CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4); +#endif +#endif + +#if CV_SIMD256 + CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void); + CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void); + CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void); + CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void); + CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void); + CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void); + CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8); + CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void); + CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void); + CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8); +#endif + +#if CV_SIMD512 + CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void); + CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void); + CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void); + CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void); + CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void); + CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void); + CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16); + CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void); + CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void); + CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16); +#endif + +#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) +#define CV__SIMD_NAMESPACE simd512 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD512_64F + #define CV_SIMD_FP16 CV_SIMD512_FP16 + #define CV_SIMD_WIDTH 64 + typedef v_uint8x64 v_uint8; + typedef v_int8x64 v_int8; + typedef v_uint16x32 v_uint16; + typedef v_int16x32 v_int16; + typedef v_uint32x16 v_uint32; + typedef v_int32x16 v_int32; + typedef v_uint64x8 v_uint64; + typedef v_int64x8 v_int64; + typedef v_float32x16 v_float32; + CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512) +#if CV_SIMD512_64F + typedef v_float64x8 v_float64; + CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load) +#endif + inline void vx_cleanup() { v512_cleanup(); } +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) +#define CV__SIMD_NAMESPACE simd256 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD256_64F + #define CV_SIMD_FP16 CV_SIMD256_FP16 + #define CV_SIMD_WIDTH 32 + typedef v_uint8x32 v_uint8; + typedef v_int8x32 v_int8; + typedef v_uint16x16 v_uint16; + typedef v_int16x16 v_int16; + typedef v_uint32x8 v_uint32; + typedef v_int32x8 v_int32; + typedef v_uint64x4 v_uint64; + typedef v_int64x4 v_int64; + typedef v_float32x8 v_float32; + CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256) + #if CV_SIMD256_64F + typedef v_float64x4 v_float64; + CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load) + #endif + inline void vx_cleanup() { v256_cleanup(); } +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) +#if defined CV_SIMD128_CPP +#define CV__SIMD_NAMESPACE simd128_cpp +#else +#define CV__SIMD_NAMESPACE simd128 +#endif +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD CV_SIMD128 + #define CV_SIMD_64F CV_SIMD128_64F + #define CV_SIMD_WIDTH 16 + typedef v_uint8x16 v_uint8; + typedef v_int8x16 v_int8; + typedef v_uint16x8 v_uint16; + typedef v_int16x8 v_int16; + typedef v_uint32x4 v_uint32; + typedef v_int32x4 v_int32; + typedef v_uint64x2 v_uint64; + typedef v_int64x2 v_int64; + typedef v_float32x4 v_float32; + CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v) + #if CV_SIMD128_64F + typedef v_float64x2 v_float64; + CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load) + #endif + inline void vx_cleanup() { v_cleanup(); } +} // namespace +using namespace CV__SIMD_NAMESPACE; +#endif + +#ifndef CV_SIMD_64F +#define CV_SIMD_64F 0 +#endif + +#ifndef CV_SIMD_FP16 +#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types +#endif + +#ifndef CV_SIMD +#define CV_SIMD 0 +#endif + +#include "simd_utils.impl.hpp" + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif + +} // cv:: + +//! @endcond + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_avx.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_avx.hpp new file mode 100644 index 0000000..54e8927 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_avx.hpp @@ -0,0 +1,3165 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_AVX_HPP +#define OPENCV_HAL_INTRIN_AVX_HPP + +#define CV_SIMD256 1 +#define CV_SIMD256_64F 1 +#define CV_SIMD256_FP16 0 // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1) + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Utils //////////// + +inline __m256i _v256_combine(const __m128i& lo, const __m128i& hi) +{ return _mm256_inserti128_si256(_mm256_castsi128_si256(lo), hi, 1); } + +inline __m256 _v256_combine(const __m128& lo, const __m128& hi) +{ return _mm256_insertf128_ps(_mm256_castps128_ps256(lo), hi, 1); } + +inline __m256d _v256_combine(const __m128d& lo, const __m128d& hi) +{ return _mm256_insertf128_pd(_mm256_castpd128_pd256(lo), hi, 1); } + +inline int _v_cvtsi256_si32(const __m256i& a) +{ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); } + +inline __m256i _v256_shuffle_odd_64(const __m256i& v) +{ return _mm256_permute4x64_epi64(v, _MM_SHUFFLE(3, 1, 2, 0)); } + +inline __m256d _v256_shuffle_odd_64(const __m256d& v) +{ return _mm256_permute4x64_pd(v, _MM_SHUFFLE(3, 1, 2, 0)); } + +template +inline __m256i _v256_permute2x128(const __m256i& a, const __m256i& b) +{ return _mm256_permute2x128_si256(a, b, imm); } + +template +inline __m256 _v256_permute2x128(const __m256& a, const __m256& b) +{ return _mm256_permute2f128_ps(a, b, imm); } + +template +inline __m256d _v256_permute2x128(const __m256d& a, const __m256d& b) +{ return _mm256_permute2f128_pd(a, b, imm); } + +template +inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_v256_permute2x128(a.val, b.val)); } + +template +inline __m256i _v256_permute4x64(const __m256i& a) +{ return _mm256_permute4x64_epi64(a, imm); } + +template +inline __m256d _v256_permute4x64(const __m256d& a) +{ return _mm256_permute4x64_pd(a, imm); } + +template +inline _Tpvec v256_permute4x64(const _Tpvec& a) +{ return _Tpvec(_v256_permute4x64(a.val)); } + +inline __m128i _v256_extract_high(const __m256i& v) +{ return _mm256_extracti128_si256(v, 1); } + +inline __m128 _v256_extract_high(const __m256& v) +{ return _mm256_extractf128_ps(v, 1); } + +inline __m128d _v256_extract_high(const __m256d& v) +{ return _mm256_extractf128_pd(v, 1); } + +inline __m128i _v256_extract_low(const __m256i& v) +{ return _mm256_castsi256_si128(v); } + +inline __m128 _v256_extract_low(const __m256& v) +{ return _mm256_castps256_ps128(v); } + +inline __m128d _v256_extract_low(const __m256d& v) +{ return _mm256_castpd256_pd128(v); } + +inline __m256i _v256_packs_epu32(const __m256i& a, const __m256i& b) +{ + const __m256i m = _mm256_set1_epi32(65535); + __m256i am = _mm256_min_epu32(a, m); + __m256i bm = _mm256_min_epu32(b, m); + return _mm256_packus_epi32(am, bm); +} + +template +inline int _v256_extract_epi8(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi8(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 4)); + return _mm_extract_epi8(b, i & 15); // SSE4.1 +#endif +} + +template +inline int _v256_extract_epi16(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi16(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 3)); + return _mm_extract_epi16(b, i & 7); // SSE2 +#endif +} + +template +inline int _v256_extract_epi32(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi32(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 2)); + return _mm_extract_epi32(b, i & 3); // SSE4.1 +#endif +} + +template +inline int64 _v256_extract_epi64(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi64(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 1)); + return _mm_extract_epi64(b, i & 1); // SSE4.1 +#endif +} + +///////// Types //////////// + +struct v_uint8x32 +{ + typedef uchar lane_type; + enum { nlanes = 32 }; + __m256i val; + + explicit v_uint8x32(__m256i v) : val(v) {} + v_uint8x32(uchar v0, uchar v1, uchar v2, uchar v3, + uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, + uchar v12, uchar v13, uchar v14, uchar v15, + uchar v16, uchar v17, uchar v18, uchar v19, + uchar v20, uchar v21, uchar v22, uchar v23, + uchar v24, uchar v25, uchar v26, uchar v27, + uchar v28, uchar v29, uchar v30, uchar v31) + { + val = _mm256_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3, + (char)v4, (char)v5, (char)v6 , (char)v7, (char)v8, (char)v9, + (char)v10, (char)v11, (char)v12, (char)v13, (char)v14, (char)v15, + (char)v16, (char)v17, (char)v18, (char)v19, (char)v20, (char)v21, + (char)v22, (char)v23, (char)v24, (char)v25, (char)v26, (char)v27, + (char)v28, (char)v29, (char)v30, (char)v31); + } + /* coverity[uninit_ctor]: suppress warning */ + v_uint8x32() {} + + uchar get0() const { return (uchar)_v_cvtsi256_si32(val); } +}; + +struct v_int8x32 +{ + typedef schar lane_type; + enum { nlanes = 32 }; + __m256i val; + + explicit v_int8x32(__m256i v) : val(v) {} + v_int8x32(schar v0, schar v1, schar v2, schar v3, + schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, + schar v12, schar v13, schar v14, schar v15, + schar v16, schar v17, schar v18, schar v19, + schar v20, schar v21, schar v22, schar v23, + schar v24, schar v25, schar v26, schar v27, + schar v28, schar v29, schar v30, schar v31) + { + val = _mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, + v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31); + } + /* coverity[uninit_ctor]: suppress warning */ + v_int8x32() {} + + schar get0() const { return (schar)_v_cvtsi256_si32(val); } +}; + +struct v_uint16x16 +{ + typedef ushort lane_type; + enum { nlanes = 16 }; + __m256i val; + + explicit v_uint16x16(__m256i v) : val(v) {} + v_uint16x16(ushort v0, ushort v1, ushort v2, ushort v3, + ushort v4, ushort v5, ushort v6, ushort v7, + ushort v8, ushort v9, ushort v10, ushort v11, + ushort v12, ushort v13, ushort v14, ushort v15) + { + val = _mm256_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3, + (short)v4, (short)v5, (short)v6, (short)v7, (short)v8, (short)v9, + (short)v10, (short)v11, (short)v12, (short)v13, (short)v14, (short)v15); + } + /* coverity[uninit_ctor]: suppress warning */ + v_uint16x16() {} + + ushort get0() const { return (ushort)_v_cvtsi256_si32(val); } +}; + +struct v_int16x16 +{ + typedef short lane_type; + enum { nlanes = 16 }; + __m256i val; + + explicit v_int16x16(__m256i v) : val(v) {} + v_int16x16(short v0, short v1, short v2, short v3, + short v4, short v5, short v6, short v7, + short v8, short v9, short v10, short v11, + short v12, short v13, short v14, short v15) + { + val = _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15); + } + /* coverity[uninit_ctor]: suppress warning */ + v_int16x16() {} + + short get0() const { return (short)_v_cvtsi256_si32(val); } +}; + +struct v_uint32x8 +{ + typedef unsigned lane_type; + enum { nlanes = 8 }; + __m256i val; + + explicit v_uint32x8(__m256i v) : val(v) {} + v_uint32x8(unsigned v0, unsigned v1, unsigned v2, unsigned v3, + unsigned v4, unsigned v5, unsigned v6, unsigned v7) + { + val = _mm256_setr_epi32((unsigned)v0, (unsigned)v1, (unsigned)v2, + (unsigned)v3, (unsigned)v4, (unsigned)v5, (unsigned)v6, (unsigned)v7); + } + /* coverity[uninit_ctor]: suppress warning */ + v_uint32x8() {} + + unsigned get0() const { return (unsigned)_v_cvtsi256_si32(val); } +}; + +struct v_int32x8 +{ + typedef int lane_type; + enum { nlanes = 8 }; + __m256i val; + + explicit v_int32x8(__m256i v) : val(v) {} + v_int32x8(int v0, int v1, int v2, int v3, + int v4, int v5, int v6, int v7) + { + val = _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7); + } + /* coverity[uninit_ctor]: suppress warning */ + v_int32x8() {} + + int get0() const { return _v_cvtsi256_si32(val); } +}; + +struct v_float32x8 +{ + typedef float lane_type; + enum { nlanes = 8 }; + __m256 val; + + explicit v_float32x8(__m256 v) : val(v) {} + v_float32x8(float v0, float v1, float v2, float v3, + float v4, float v5, float v6, float v7) + { + val = _mm256_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7); + } + /* coverity[uninit_ctor]: suppress warning */ + v_float32x8() {} + + float get0() const { return _mm_cvtss_f32(_mm256_castps256_ps128(val)); } +}; + +struct v_uint64x4 +{ + typedef uint64 lane_type; + enum { nlanes = 4 }; + __m256i val; + + explicit v_uint64x4(__m256i v) : val(v) {} + v_uint64x4(uint64 v0, uint64 v1, uint64 v2, uint64 v3) + { val = _mm256_setr_epi64x((int64)v0, (int64)v1, (int64)v2, (int64)v3); } + /* coverity[uninit_ctor]: suppress warning */ + v_uint64x4() {} + + uint64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (uint64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #endif + } +}; + +struct v_int64x4 +{ + typedef int64 lane_type; + enum { nlanes = 4 }; + __m256i val; + + explicit v_int64x4(__m256i v) : val(v) {} + v_int64x4(int64 v0, int64 v1, int64 v2, int64 v3) + { val = _mm256_setr_epi64x(v0, v1, v2, v3); } + /* coverity[uninit_ctor]: suppress warning */ + v_int64x4() {} + + int64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (int64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #endif + } +}; + +struct v_float64x4 +{ + typedef double lane_type; + enum { nlanes = 4 }; + __m256d val; + + explicit v_float64x4(__m256d v) : val(v) {} + v_float64x4(double v0, double v1, double v2, double v3) + { val = _mm256_setr_pd(v0, v1, v2, v3); } + /* coverity[uninit_ctor]: suppress warning */ + v_float64x4() {} + + double get0() const { return _mm_cvtsd_f64(_mm256_castpd256_pd128(val)); } +}; + +//////////////// Load and store operations /////////////// + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE(_Tpvec, _Tp) \ + inline _Tpvec v256_load(const _Tp* ptr) \ + { return _Tpvec(_mm256_loadu_si256((const __m256i*)ptr)); } \ + inline _Tpvec v256_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm256_load_si256((const __m256i*)ptr)); } \ + inline _Tpvec v256_load_low(const _Tp* ptr) \ + { \ + __m128i v128 = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpvec(_mm256_castsi128_si256(v128)); \ + } \ + inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + __m128i vlo = _mm_loadu_si128((const __m128i*)ptr0); \ + __m128i vhi = _mm_loadu_si128((const __m128i*)ptr1); \ + return _Tpvec(_v256_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm256_store_si256((__m256i*)ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_si256((__m256i*)ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_si256((__m256i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_si256((__m256i*)ptr, a.val); \ + else \ + _mm256_store_si256((__m256i*)ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_si128((__m128i*)ptr, _v256_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint8x32, uchar) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int8x32, schar) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint16x16, ushort) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int16x16, short) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint32x8, unsigned) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int32x8, int) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint64x4, uint64) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int64x4, int64) + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg) \ + inline _Tpvec v256_load(const _Tp* ptr) \ + { return _Tpvec(_mm256_loadu_##suffix(ptr)); } \ + inline _Tpvec v256_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm256_load_##suffix(ptr)); } \ + inline _Tpvec v256_load_low(const _Tp* ptr) \ + { \ + return _Tpvec(_mm256_cast##suffix##128_##suffix##256 \ + (_mm_loadu_##suffix(ptr))); \ + } \ + inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + halfreg vlo = _mm_loadu_##suffix(ptr0); \ + halfreg vhi = _mm_loadu_##suffix(ptr1); \ + return _Tpvec(_v256_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm256_store_##suffix(ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_##suffix(ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_##suffix(ptr, a.val); \ + else \ + _mm256_store_##suffix(ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_##suffix(ptr, _v256_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_##suffix(ptr, _v256_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float32x8, float, ps, __m128) +OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float64x4, double, pd, __m128d) + +#define OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, _Tpvecf, suffix, cast) \ + inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a) \ + { return _Tpvec(cast(a.val)); } + +#define OPENCV_HAL_IMPL_AVX_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s) \ + inline _Tpvec v256_setzero_##suffix() \ + { return _Tpvec(_mm256_setzero_si256()); } \ + inline _Tpvec v256_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm256_set1_##ssuffix((ctype_s)v)); } \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float32x8, suffix, _mm256_castps_si256) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float64x4, suffix, _mm256_castpd_si256) + +OPENCV_HAL_IMPL_AVX_INIT(v_uint8x32, uchar, u8, epi8, char) +OPENCV_HAL_IMPL_AVX_INIT(v_int8x32, schar, s8, epi8, char) +OPENCV_HAL_IMPL_AVX_INIT(v_uint16x16, ushort, u16, epi16, short) +OPENCV_HAL_IMPL_AVX_INIT(v_int16x16, short, s16, epi16, short) +OPENCV_HAL_IMPL_AVX_INIT(v_uint32x8, unsigned, u32, epi32, int) +OPENCV_HAL_IMPL_AVX_INIT(v_int32x8, int, s32, epi32, int) +OPENCV_HAL_IMPL_AVX_INIT(v_uint64x4, uint64, u64, epi64x, int64) +OPENCV_HAL_IMPL_AVX_INIT(v_int64x4, int64, s64, epi64x, int64) + +#define OPENCV_HAL_IMPL_AVX_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \ + inline _Tpvec v256_setzero_##suffix() \ + { return _Tpvec(_mm256_setzero_##zsuffix()); } \ + inline _Tpvec v256_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm256_set1_##zsuffix(v)); } \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, cast) + +OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float32x8, float, f32, ps, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float64x4, double, f64, pd, _mm256_castsi256_pd) + +inline v_float32x8 v_reinterpret_as_f32(const v_float32x8& a) +{ return a; } +inline v_float32x8 v_reinterpret_as_f32(const v_float64x4& a) +{ return v_float32x8(_mm256_castpd_ps(a.val)); } + +inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a) +{ return a; } +inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a) +{ return v_float64x4(_mm256_castps_pd(a.val)); } + +/* Recombine */ +/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(perm(a.val, b.val, 0x20)); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(perm(a.val, b.val, 0x31)); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { c = v_combine_low(a, b); d = v_combine_high(a, b); } + +#define OPENCV_HAL_IMPL_AVX_UNPACKS(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, _mm256_permute2x128_si256) \ + inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, \ + _Tpvec& b0, _Tpvec& b1) \ + { \ + __m256i v0 = _v256_shuffle_odd_64(a0.val); \ + __m256i v1 = _v256_shuffle_odd_64(a1.val); \ + b0.val = _mm256_unpacklo_##suffix(v0, v1); \ + b1.val = _mm256_unpackhi_##suffix(v0, v1); \ + } + +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int64x4, epi64) +OPENCV_HAL_IMPL_AVX_COMBINE(v_float32x8, _mm256_permute2f128_ps) +OPENCV_HAL_IMPL_AVX_COMBINE(v_float64x4, _mm256_permute2f128_pd) + +inline void v_zip(const v_float32x8& a0, const v_float32x8& a1, v_float32x8& b0, v_float32x8& b1) +{ + __m256 v0 = _mm256_unpacklo_ps(a0.val, a1.val); + __m256 v1 = _mm256_unpackhi_ps(a0.val, a1.val); + v_recombine(v_float32x8(v0), v_float32x8(v1), b0, b1); +} + +inline void v_zip(const v_float64x4& a0, const v_float64x4& a1, v_float64x4& b0, v_float64x4& b1) +{ + __m256d v0 = _v_shuffle_odd_64(a0.val); + __m256d v1 = _v_shuffle_odd_64(a1.val); + b0.val = _mm256_unpacklo_pd(v0, v1); + b1.val = _mm256_unpackhi_pd(v0, v1); +}*/ + +//////////////// Variant Value reordering /////////////// + +// unpacks +#define OPENCV_HAL_IMPL_AVX_UNPACK(_Tpvec, suffix) \ + inline _Tpvec v256_unpacklo(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_unpacklo_##suffix(a.val, b.val)); } \ + inline _Tpvec v256_unpackhi(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_unpackhi_##suffix(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACK(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_UNPACK(v_float64x4, pd) + +// blend +#define OPENCV_HAL_IMPL_AVX_BLEND(_Tpvec, suffix) \ + template \ + inline _Tpvec v256_blend(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_blend_##suffix(a.val, b.val, m)); } + +OPENCV_HAL_IMPL_AVX_BLEND(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_BLEND(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_BLEND(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_BLEND(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_BLEND(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_BLEND(v_float64x4, pd) + +template +inline v_uint64x4 v256_blend(const v_uint64x4& a, const v_uint64x4& b) +{ + enum {M0 = m}; + enum {M1 = (M0 | (M0 << 2)) & 0x33}; + enum {M2 = (M1 | (M1 << 1)) & 0x55}; + enum {MM = M2 | (M2 << 1)}; + return v_uint64x4(_mm256_blend_epi32(a.val, b.val, MM)); +} +template +inline v_int64x4 v256_blend(const v_int64x4& a, const v_int64x4& b) +{ return v_int64x4(v256_blend(v_uint64x4(a.val), v_uint64x4(b.val)).val); } + +// shuffle +// todo: emulate 64bit +#define OPENCV_HAL_IMPL_AVX_SHUFFLE(_Tpvec, intrin) \ + template \ + inline _Tpvec v256_shuffle(const _Tpvec& a) \ + { return _Tpvec(_mm256_##intrin(a.val, m)); } + +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_uint32x8, shuffle_epi32) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_int32x8, shuffle_epi32) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float32x8, permute_ps) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float64x4, permute_pd) + +template +inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1) +{ + ab0 = v256_unpacklo(a, b); + ab1 = v256_unpackhi(a, b); +} + +template +inline _Tpvec v256_combine_diagonal(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0xf0)); } + +inline v_float32x8 v256_combine_diagonal(const v_float32x8& a, const v_float32x8& b) +{ return v256_blend<0xf0>(a, b); } + +inline v_float64x4 v256_combine_diagonal(const v_float64x4& a, const v_float64x4& b) +{ return v256_blend<0xc>(a, b); } + +template +inline _Tpvec v256_alignr_128(const _Tpvec& a, const _Tpvec& b) +{ return v256_permute2x128<0x21>(a, b); } + +template +inline _Tpvec v256_alignr_64(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_mm256_alignr_epi8(a.val, b.val, 8)); } +inline v_float64x4 v256_alignr_64(const v_float64x4& a, const v_float64x4& b) +{ return v_float64x4(_mm256_shuffle_pd(b.val, a.val, _MM_SHUFFLE(0, 0, 1, 1))); } +// todo: emulate float32 + +template +inline _Tpvec v256_swap_halves(const _Tpvec& a) +{ return v256_permute2x128<1>(a, a); } + +template +inline _Tpvec v256_reverse_64(const _Tpvec& a) +{ return v256_permute4x64<_MM_SHUFFLE(0, 1, 2, 3)>(a); } + +// ZIP +#define OPENCV_HAL_IMPL_AVX_ZIP(_Tpvec) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return v256_permute2x128<0x20>(a, b); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return v256_permute2x128<0x31>(a, b); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { \ + _Tpvec a1b0 = v256_alignr_128(a, b); \ + c = v256_combine_diagonal(a, a1b0); \ + d = v256_combine_diagonal(a1b0, b); \ + } \ + inline void v_zip(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& ab0, _Tpvec& ab1) \ + { \ + _Tpvec ab0ab2, ab1ab3; \ + v256_zip(a, b, ab0ab2, ab1ab3); \ + v_recombine(ab0ab2, ab1ab3, ab0, ab1); \ + } + +OPENCV_HAL_IMPL_AVX_ZIP(v_uint8x32) +OPENCV_HAL_IMPL_AVX_ZIP(v_int8x32) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint16x16) +OPENCV_HAL_IMPL_AVX_ZIP(v_int16x16) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_int32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint64x4) +OPENCV_HAL_IMPL_AVX_ZIP(v_int64x4) +OPENCV_HAL_IMPL_AVX_ZIP(v_float32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_float64x4) + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ + +/** Arithmetics **/ +#define OPENCV_HAL_IMPL_AVX_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint8x32, _mm256_adds_epu8) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint8x32, _mm256_subs_epu8) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int8x32, _mm256_adds_epi8) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int8x32, _mm256_subs_epi8) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint16x16, _mm256_adds_epu16) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint16x16, _mm256_subs_epu16) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int16x16, _mm256_adds_epi16) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int16x16, _mm256_subs_epi16) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint32x8, _mm256_add_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint32x8, _mm256_sub_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_uint32x8, _mm256_mullo_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int32x8, _mm256_add_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int32x8, _mm256_sub_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_int32x8, _mm256_mullo_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint64x4, _mm256_add_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint64x4, _mm256_sub_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int64x4, _mm256_add_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int64x4, _mm256_sub_epi64) + +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float32x8, _mm256_add_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float32x8, _mm256_sub_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float32x8, _mm256_mul_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float32x8, _mm256_div_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float64x4, _mm256_add_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float64x4, _mm256_sub_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float64x4, _mm256_mul_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float64x4, _mm256_div_pd) + +// saturating multiply 8-bit, 16-bit +inline v_uint8x32 operator * (const v_uint8x32& a, const v_uint8x32& b) +{ + v_uint16x16 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_int8x32 operator * (const v_int8x32& a, const v_int8x32& b) +{ + v_int16x16 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_uint16x16 operator * (const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i pl = _mm256_mullo_epi16(a.val, b.val); + __m256i ph = _mm256_mulhi_epu16(a.val, b.val); + __m256i p0 = _mm256_unpacklo_epi16(pl, ph); + __m256i p1 = _mm256_unpackhi_epi16(pl, ph); + return v_uint16x16(_v256_packs_epu32(p0, p1)); +} +inline v_int16x16 operator * (const v_int16x16& a, const v_int16x16& b) +{ + __m256i pl = _mm256_mullo_epi16(a.val, b.val); + __m256i ph = _mm256_mulhi_epi16(a.val, b.val); + __m256i p0 = _mm256_unpacklo_epi16(pl, ph); + __m256i p1 = _mm256_unpackhi_epi16(pl, ph); + return v_int16x16(_mm256_packs_epi32(p0, p1)); +} +inline v_uint8x32& operator *= (v_uint8x32& a, const v_uint8x32& b) +{ a = a * b; return a; } +inline v_int8x32& operator *= (v_int8x32& a, const v_int8x32& b) +{ a = a * b; return a; } +inline v_uint16x16& operator *= (v_uint16x16& a, const v_uint16x16& b) +{ a = a * b; return a; } +inline v_int16x16& operator *= (v_int16x16& a, const v_int16x16& b) +{ a = a * b; return a; } + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \ + inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint8x32, _mm256_add_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int8x32, _mm256_add_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint16x16, _mm256_add_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int16x16, _mm256_add_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint8x32, _mm256_sub_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int8x32, _mm256_sub_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint16x16, _mm256_sub_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int16x16, _mm256_sub_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_uint16x16, _mm256_mullo_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_int16x16, _mm256_mullo_epi16) + +inline v_uint8x32 v_mul_wrap(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i ad = _mm256_srai_epi16(a.val, 8); + __m256i bd = _mm256_srai_epi16(b.val, 8); + __m256i p0 = _mm256_mullo_epi16(a.val, b.val); // even + __m256i p1 = _mm256_slli_epi16(_mm256_mullo_epi16(ad, bd), 8); // odd + + const __m256i b01 = _mm256_set1_epi32(0xFF00FF00); + return v_uint8x32(_mm256_blendv_epi8(p0, p1, b01)); +} +inline v_int8x32 v_mul_wrap(const v_int8x32& a, const v_int8x32& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +// Multiply and expand +inline void v_mul_expand(const v_uint8x32& a, const v_uint8x32& b, + v_uint16x16& c, v_uint16x16& d) +{ + v_uint16x16 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x32& a, const v_int8x32& b, + v_int16x16& c, v_int16x16& d) +{ + v_int16x16 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x16& a, const v_int16x16& b, + v_int32x8& c, v_int32x8& d) +{ + v_int16x16 vhi = v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); + + v_int16x16 v0, v1; + v_zip(v_mul_wrap(a, b), vhi, v0, v1); + + c = v_reinterpret_as_s32(v0); + d = v_reinterpret_as_s32(v1); +} + +inline void v_mul_expand(const v_uint16x16& a, const v_uint16x16& b, + v_uint32x8& c, v_uint32x8& d) +{ + v_uint16x16 vhi = v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); + + v_uint16x16 v0, v1; + v_zip(v_mul_wrap(a, b), vhi, v0, v1); + + c = v_reinterpret_as_u32(v0); + d = v_reinterpret_as_u32(v1); +} + +inline void v_mul_expand(const v_uint32x8& a, const v_uint32x8& b, + v_uint64x4& c, v_uint64x4& d) +{ + __m256i v0 = _mm256_mul_epu32(a.val, b.val); + __m256i v1 = _mm256_mul_epu32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32)); + v_zip(v_uint64x4(v0), v_uint64x4(v1), c, d); +} + +inline v_int16x16 v_mul_hi(const v_int16x16& a, const v_int16x16& b) { return v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); } +inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); } + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_AVX_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \ + inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \ + inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ + { return _Tpsvec(srai(a.val, imm)); } \ + template \ + inline _Tpuvec v_shl(const _Tpuvec& a) \ + { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shl(const _Tpsvec& a) \ + { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shr(const _Tpuvec& a) \ + { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shr(const _Tpsvec& a) \ + { return _Tpsvec(srai(a.val, imm)); } + +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint16x16, v_int16x16, epi16, _mm256_srai_epi16) +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint32x8, v_int32x8, epi32, _mm256_srai_epi32) + +inline __m256i _mm256_srai_epi64xx(const __m256i a, int imm) +{ + __m256i d = _mm256_set1_epi64x((int64)1 << 63); + __m256i r = _mm256_srli_epi64(_mm256_add_epi64(a, d), imm); + return _mm256_sub_epi64(r, _mm256_srli_epi64(d, imm)); +} +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint64x4, v_int64x4, epi64, _mm256_srai_epi64xx) + + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_AVX_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(&, _Tpvec, _mm256_and_##suffix) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(|, _Tpvec, _mm256_or_##suffix) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(^, _Tpvec, _mm256_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { return _Tpvec(_mm256_xor_##suffix(a.val, not_const)); } + +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint8x32, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int8x32, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint16x16, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int16x16, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint32x8, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int32x8, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint64x4, si256, _mm256_set1_epi64x(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int64x4, si256, _mm256_set1_epi64x(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float32x8, ps, _mm256_castsi256_ps(_mm256_set1_epi32(-1))) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float64x4, pd, _mm256_castsi256_pd(_mm256_set1_epi32(-1))) + +/** Select **/ +#define OPENCV_HAL_IMPL_AVX_SELECT(_Tpvec, suffix) \ + inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_blendv_##suffix(b.val, a.val, mask.val)); } + +OPENCV_HAL_IMPL_AVX_SELECT(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_uint16x16, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int16x16, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_uint32x8, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int32x8, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_SELECT(v_float64x4, pd) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpvec) \ + inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a == b); } \ + inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ + { return b > a; } \ + inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a < b); } \ + inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ + { return b >= a; } + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_INT(_Tpuvec, _Tpsvec, suffix, sbit) \ + inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \ + { return _Tpuvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \ + inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \ + { \ + __m256i smask = _mm256_set1_##suffix(sbit); \ + return _Tpuvec(_mm256_cmpgt_##suffix( \ + _mm256_xor_si256(a.val, smask), \ + _mm256_xor_si256(b.val, smask))); \ + } \ + inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \ + { return _Tpsvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \ + inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \ + { return _Tpsvec(_mm256_cmpgt_##suffix(a.val, b.val)); } \ + OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpuvec) \ + OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpsvec) + +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint8x32, v_int8x32, epi8, (char)-128) +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint16x16, v_int16x16, epi16, (short)-32768) +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint32x8, v_int32x8, epi32, (int)0x80000000) + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(_Tpvec) \ + inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_cmpeq_epi64(a.val, b.val)); } \ + inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a == b); } + +OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_uint64x4) +OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4) + +#define OPENCV_HAL_IMPL_AVX_CMP_FLT(bin_op, imm8, _Tpvec, suffix) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_cmp_##suffix(a.val, b.val, imm8)); } + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(==, _CMP_EQ_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(<, _CMP_LT_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(>, _CMP_GT_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(<=, _CMP_LE_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(>=, _CMP_GE_OQ, _Tpvec, suffix) + +OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float64x4, pd) + +inline v_float32x8 v_not_nan(const v_float32x8& a) +{ return v_float32x8(_mm256_cmp_ps(a.val, a.val, _CMP_ORD_Q)); } +inline v_float64x4 v_not_nan(const v_float64x4& a) +{ return v_float64x4(_mm256_cmp_pd(a.val, a.val, _CMP_ORD_Q)); } + +/** min/max **/ +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint8x32, _mm256_min_epu8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint8x32, _mm256_max_epu8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int8x32, _mm256_min_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int8x32, _mm256_max_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint16x16, _mm256_min_epu16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint16x16, _mm256_max_epu16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int16x16, _mm256_min_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int16x16, _mm256_max_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint32x8, _mm256_min_epu32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint32x8, _mm256_max_epu32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int32x8, _mm256_min_epi32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int32x8, _mm256_max_epi32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float32x8, _mm256_min_ps) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float32x8, _mm256_max_ps) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float64x4, _mm256_min_pd) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd) + +/** Rotate **/ +template +inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b) +{ + enum {IMM_R = (16 - imm) & 0xFF}; + enum {IMM_R2 = (32 - imm) & 0xFF}; + + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); + + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, IMM_R)); + return v_uint8x32(_mm256_alignr_epi8(swap, b.val, IMM_R2)); // imm < 32 +} + +template +inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); + + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm)); + return v_uint8x32(_mm256_alignr_epi8(b.val, swap, IMM_L)); +} + +template +inline v_uint8x32 v_rotate_left(const v_uint8x32& a) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + enum {IMM_R = (16 - imm) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + + // ESAC control[3] ? [127:0] = 0 + __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0)); + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swapz, IMM_R)); + return v_uint8x32(_mm256_slli_si256(swapz, IMM_L)); +} + +template +inline v_uint8x32 v_rotate_right(const v_uint8x32& a) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + + // ESAC control[3] ? [127:0] = 0 + __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1)); + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swapz, a.val, imm)); + return v_uint8x32(_mm256_srli_si256(swapz, IMM_L)); +} + +#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \ + template \ + inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a), \ + v_reinterpret_as_u8(b)); \ + return _Tpvec(cast(ret.val)); \ + } \ + template \ + inline _Tpvec intrin(const _Tpvec& a) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a)); \ + return _Tpvec(cast(ret.val)); \ + } + +#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec) \ + OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, _Tpvec, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, _Tpvec, OPENCV_HAL_NOP) + +OPENCV_HAL_IMPL_AVX_ROTATE(v_int8x32) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint16x16) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int16x16) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint32x8) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int32x8) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint64x4) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int64x4) + +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float32x8, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float32x8, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float64x4, _mm256_castsi256_pd) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd) + +/** Reverse **/ +inline v_uint8x32 v_reverse(const v_uint8x32 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint8x32(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int8x32 v_reverse(const v_int8x32 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x16 v_reverse(const v_uint16x16 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint16x16(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int16x16 v_reverse(const v_int16x16 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x8 v_reverse(const v_uint32x8 &a) +{ + static const __m256i perm = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0); + return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm)); +} + +inline v_int32x8 v_reverse(const v_int32x8 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x8 v_reverse(const v_float32x8 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x4 v_reverse(const v_uint64x4 &a) +{ + return v_uint64x4(_mm256_permute4x64_epi64(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int64x4 v_reverse(const v_int64x4 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x4 v_reverse(const v_float64x4 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +////////// Reduce and mask ///////// + +/** Reduce **/ +inline unsigned v_reduce_sum(const v_uint8x32& a) +{ + __m256i half = _mm256_sad_epu8(a.val, _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline int v_reduce_sum(const v_int8x32& a) +{ + __m256i half = _mm256_sad_epu8(_mm256_xor_si256(a.val, _mm256_set1_epi8((schar)-128)), _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))) - 4096; +} +#define OPENCV_HAL_IMPL_AVX_REDUCE_32(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i val = intrin(_v256_extract_low(a.val), _v256_extract_high(a.val)); \ + val = intrin(val, _mm_srli_si128(val,8)); \ + val = intrin(val, _mm_srli_si128(val,4)); \ + val = intrin(val, _mm_srli_si128(val,2)); \ + val = intrin(val, _mm_srli_si128(val,1)); \ + return (sctype)_mm_cvtsi128_si32(val); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, min, _mm_min_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, min, _mm_min_epi8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, max, _mm_max_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, max, _mm_max_epi8) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_16(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i v0 = _v256_extract_low(a.val); \ + __m128i v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_srli_si128(v0, 8)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 4)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 2)); \ + return (sctype) _mm_cvtsi128_si32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, min, _mm_min_epu16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16, short, min, _mm_min_epi16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, max, _mm_max_epu16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16, short, max, _mm_max_epi16) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_8(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i v0 = _v256_extract_low(a.val); \ + __m128i v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_srli_si128(v0, 8)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 4)); \ + return (sctype) _mm_cvtsi128_si32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, min, _mm_min_epu32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, min, _mm_min_epi32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, max, _mm_max_epu32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, max, _mm_max_epi32) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_FLT(func, intrin) \ + inline float v_reduce_##func(const v_float32x8& a) \ + { \ + __m128 v0 = _v256_extract_low(a.val); \ + __m128 v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 3, 2))); \ + v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 0, 1))); \ + return _mm_cvtss_f32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_FLT(min, _mm_min_ps) +OPENCV_HAL_IMPL_AVX_REDUCE_FLT(max, _mm_max_ps) + +inline int v_reduce_sum(const v_int32x8& a) +{ + __m256i s0 = _mm256_hadd_epi32(a.val, a.val); + s0 = _mm256_hadd_epi32(s0, s0); + + __m128i s1 = _v256_extract_high(s0); + s1 = _mm_add_epi32(_v256_extract_low(s0), s1); + + return _mm_cvtsi128_si32(s1); +} + +inline unsigned v_reduce_sum(const v_uint32x8& a) +{ return v_reduce_sum(v_reinterpret_as_s32(a)); } + +inline int v_reduce_sum(const v_int16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +inline float v_reduce_sum(const v_float32x8& a) +{ + __m256 s0 = _mm256_hadd_ps(a.val, a.val); + s0 = _mm256_hadd_ps(s0, s0); + + __m128 s1 = _v256_extract_high(s0); + s1 = _mm_add_ps(_v256_extract_low(s0), s1); + + return _mm_cvtss_f32(s1); +} + +inline uint64 v_reduce_sum(const v_uint64x4& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x4& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} +inline double v_reduce_sum(const v_float64x4& a) +{ + __m256d s0 = _mm256_hadd_pd(a.val, a.val); + return _mm_cvtsd_f64(_mm_add_pd(_v256_extract_low(s0), _v256_extract_high(s0))); +} + +inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b, + const v_float32x8& c, const v_float32x8& d) +{ + __m256 ab = _mm256_hadd_ps(a.val, b.val); + __m256 cd = _mm256_hadd_ps(c.val, d.val); + return v_float32x8(_mm256_hadd_ps(ab, cd)); +} + +inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i half = _mm256_sad_epu8(a.val, b.val); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b) +{ + __m256i half = _mm256_set1_epi8(0x7f); + half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b) +{ + v_uint32x8 l, h; + v_expand(v_add_wrap(a - b, b - a), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x16& a, const v_int16x16& b) +{ + v_uint32x8 l, h; + v_expand(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x8& a, const v_uint32x8& b) +{ + return v_reduce_sum(v_max(a, b) - v_min(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 m = a < b; + return v_reduce_sum(v_reinterpret_as_u32(((a - b) ^ m) - m)); +} +inline float v_reduce_sad(const v_float32x8& a, const v_float32x8& b) +{ + return v_reduce_sum((a - b) & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)))); +} + +/** Popcount **/ +inline v_uint8x32 v_popcount(const v_uint8x32& a) +{ + __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); + __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); + return v_uint8x32(_mm256_add_epi8(_mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256( a.val , _popcnt_mask)), + _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_mm256_srli_epi16(a.val, 4), _popcnt_mask)))); +} +inline v_uint16x16 v_popcount(const v_uint16x16& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v256_setall_u16(0x00ff); +} +inline v_uint32x8 v_popcount(const v_uint32x8& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v256_setall_u32(0x000000ff); +} +inline v_uint64x4 v_popcount(const v_uint64x4& a) +{ + return v_uint64x4(_mm256_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm256_setzero_si256())); +} +inline v_uint8x32 v_popcount(const v_int8x32& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x16 v_popcount(const v_int16x16& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x8 v_popcount(const v_int32x8& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x4 v_popcount(const v_int64x4& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +/** Mask **/ +inline int v_signmask(const v_int8x32& a) +{ return _mm256_movemask_epi8(a.val); } +inline int v_signmask(const v_uint8x32& a) +{ return v_signmask(v_reinterpret_as_s8(a)); } + +inline int v_signmask(const v_int16x16& a) +{ return v_signmask(v_pack(a, a)) & 0xFFFF; } +inline int v_signmask(const v_uint16x16& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } + +inline int v_signmask(const v_float32x8& a) +{ return _mm256_movemask_ps(a.val); } +inline int v_signmask(const v_float64x4& a) +{ return _mm256_movemask_pd(a.val); } + +inline int v_signmask(const v_int32x8& a) +{ return v_signmask(v_reinterpret_as_f32(a)); } +inline int v_signmask(const v_uint32x8& a) +{ return v_signmask(v_reinterpret_as_f32(a)); } + +inline int v_signmask(const v_int64x4& a) +{ return v_signmask(v_reinterpret_as_f64(a)); } +inline int v_signmask(const v_uint64x4& a) +{ return v_signmask(v_reinterpret_as_f64(a)); } + +inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +/** Checks **/ +#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \ + inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \ + inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; } +OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1) +OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1) +OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15) +OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15) +OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15) + +#define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec) \ + inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \ + inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; } +OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16) +OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16) + +////////// Other math ///////// + +/** Some frequent operations **/ +#define OPENCV_HAL_IMPL_AVX_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_sqrt(const _Tpvec& x) \ + { return _Tpvec(_mm256_sqrt_##suffix(x.val)); } \ + inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_fma(a, a, b * b); } \ + inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_sqrt(v_fma(a, a, b*b)); } + +OPENCV_HAL_IMPL_AVX_MULADD(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_MULADD(v_float64x4, pd) + +inline v_int32x8 v_fma(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c) +{ + return a * b + c; +} + +inline v_int32x8 v_muladd(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x8 v_invsqrt(const v_float32x8& x) +{ + v_float32x8 half = x * v256_setall_f32(0.5); + v_float32x8 t = v_float32x8(_mm256_rsqrt_ps(x.val)); + // todo: _mm256_fnmsub_ps + t *= v256_setall_f32(1.5) - ((t * t) * half); + return t; +} + +inline v_float64x4 v_invsqrt(const v_float64x4& x) +{ + return v256_setall_f64(1.) / v_sqrt(x); +} + +/** Absolute values **/ +#define OPENCV_HAL_IMPL_AVX_ABS(_Tpvec, suffix) \ + inline v_u##_Tpvec v_abs(const v_##_Tpvec& x) \ + { return v_u##_Tpvec(_mm256_abs_##suffix(x.val)); } + +OPENCV_HAL_IMPL_AVX_ABS(int8x32, epi8) +OPENCV_HAL_IMPL_AVX_ABS(int16x16, epi16) +OPENCV_HAL_IMPL_AVX_ABS(int32x8, epi32) + +inline v_float32x8 v_abs(const v_float32x8& x) +{ return x & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); } +inline v_float64x4 v_abs(const v_float64x4& x) +{ return x & v_float64x4(_mm256_castsi256_pd(_mm256_srli_epi64(_mm256_set1_epi64x(-1), 1))); } + +/** Absolute difference **/ +inline v_uint8x32 v_absdiff(const v_uint8x32& a, const v_uint8x32& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x16 v_absdiff(const v_uint16x16& a, const v_uint16x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x8 v_absdiff(const v_uint32x8& a, const v_uint32x8& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x32 v_absdiff(const v_int8x32& a, const v_int8x32& b) +{ + v_int8x32 d = v_sub_wrap(a, b); + v_int8x32 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} + +inline v_uint16x16 v_absdiff(const v_int16x16& a, const v_int16x16& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } + +inline v_uint32x8 v_absdiff(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 d = a - b; + v_int32x8 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +inline v_float32x8 v_absdiff(const v_float32x8& a, const v_float32x8& b) +{ return v_abs(a - b); } + +inline v_float64x4 v_absdiff(const v_float64x4& a, const v_float64x4& b) +{ return v_abs(a - b); } + +/** Saturating absolute difference **/ +inline v_int8x32 v_absdiffs(const v_int8x32& a, const v_int8x32& b) +{ + v_int8x32 d = a - b; + v_int8x32 m = a < b; + return (d ^ m) - m; +} +inline v_int16x16 v_absdiffs(const v_int16x16& a, const v_int16x16& b) +{ return v_max(a, b) - v_min(a, b); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x8 v_round(const v_float32x8& a) +{ return v_int32x8(_mm256_cvtps_epi32(a.val)); } + +inline v_int32x8 v_round(const v_float64x4& a) +{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvtpd_epi32(a.val))); } + +inline v_int32x8 v_round(const v_float64x4& a, const v_float64x4& b) +{ + __m128i ai = _mm256_cvtpd_epi32(a.val), bi = _mm256_cvtpd_epi32(b.val); + return v_int32x8(_v256_combine(ai, bi)); +} + +inline v_int32x8 v_trunc(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(a.val)); } + +inline v_int32x8 v_trunc(const v_float64x4& a) +{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvttpd_epi32(a.val))); } + +inline v_int32x8 v_floor(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(_mm256_floor_ps(a.val))); } + +inline v_int32x8 v_floor(const v_float64x4& a) +{ return v_trunc(v_float64x4(_mm256_floor_pd(a.val))); } + +inline v_int32x8 v_ceil(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(_mm256_ceil_ps(a.val))); } + +inline v_int32x8 v_ceil(const v_float64x4& a) +{ return v_trunc(v_float64x4(_mm256_ceil_pd(a.val))); } + +/** To float **/ +inline v_float32x8 v_cvt_f32(const v_int32x8& a) +{ return v_float32x8(_mm256_cvtepi32_ps(a.val)); } + +inline v_float32x8 v_cvt_f32(const v_float64x4& a) +{ return v_float32x8(_mm256_castps128_ps256(_mm256_cvtpd_ps(a.val))); } + +inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b) +{ + __m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val); + return v_float32x8(_v256_combine(af, bf)); +} + +inline v_float64x4 v_cvt_f64(const v_int32x8& a) +{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_low(a.val))); } + +inline v_float64x4 v_cvt_f64_high(const v_int32x8& a) +{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_high(a.val))); } + +inline v_float64x4 v_cvt_f64(const v_float32x8& a) +{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_low(a.val))); } + +inline v_float64x4 v_cvt_f64_high(const v_float32x8& a) +{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_high(a.val))); } + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x4 v_cvt_f64(const v_int64x4& v) +{ + // constants encoded as floating-point + __m256i magic_i_lo = _mm256_set1_epi64x(0x4330000000000000); // 2^52 + __m256i magic_i_hi32 = _mm256_set1_epi64x(0x4530000080000000); // 2^84 + 2^63 + __m256i magic_i_all = _mm256_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m256d magic_d_all = _mm256_castsi256_pd(magic_i_all); + + // Blend the 32 lowest significant bits of v with magic_int_lo + __m256i v_lo = _mm256_blend_epi32(magic_i_lo, v.val, 0x55); + // Extract the 32 most significant bits of v + __m256i v_hi = _mm256_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm256_xor_si256(v_hi, magic_i_hi32); + // Compute in double precision + __m256d v_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m256d result = _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo)); + return v_float64x4(result); +} + +////////////// Lookup table access //////////////////// + +inline v_int8x32 v256_lut(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_setr_epi8(tab[idx[ 0]], tab[idx[ 1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[ 8]], tab[idx[ 9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]], + tab[idx[16]], tab[idx[17]], tab[idx[18]], tab[idx[19]], tab[idx[20]], tab[idx[21]], tab[idx[22]], tab[idx[23]], + tab[idx[24]], tab[idx[25]], tab[idx[26]], tab[idx[27]], tab[idx[28]], tab[idx[29]], tab[idx[30]], tab[idx[31]])); +} +inline v_int8x32 v256_lut_pairs(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_setr_epi16(*(const short*)(tab + idx[ 0]), *(const short*)(tab + idx[ 1]), *(const short*)(tab + idx[ 2]), *(const short*)(tab + idx[ 3]), + *(const short*)(tab + idx[ 4]), *(const short*)(tab + idx[ 5]), *(const short*)(tab + idx[ 6]), *(const short*)(tab + idx[ 7]), + *(const short*)(tab + idx[ 8]), *(const short*)(tab + idx[ 9]), *(const short*)(tab + idx[10]), *(const short*)(tab + idx[11]), + *(const short*)(tab + idx[12]), *(const short*)(tab + idx[13]), *(const short*)(tab + idx[14]), *(const short*)(tab + idx[15]))); +} +inline v_int8x32 v256_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 1)); +} +inline v_uint8x32 v256_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut((const schar *)tab, idx)); } +inline v_uint8x32 v256_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x32 v256_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_quads((const schar *)tab, idx)); } + +inline v_int16x16 v256_lut(const short* tab, const int* idx) +{ + return v_int16x16(_mm256_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]])); +} +inline v_int16x16 v256_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x16(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 2)); +} +inline v_int16x16 v256_lut_quads(const short* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int16x16(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 2));//Looks like intrinsic has wrong definition +#else + return v_int16x16(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 2)); +#endif +} +inline v_uint16x16 v256_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut((const short *)tab, idx)); } +inline v_uint16x16 v256_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_pairs((const short *)tab, idx)); } +inline v_uint16x16 v256_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_quads((const short *)tab, idx)); } + +inline v_int32x8 v256_lut(const int* tab, const int* idx) +{ + return v_int32x8(_mm256_i32gather_epi32(tab, _mm256_loadu_si256((const __m256i*)idx), 4)); +} +inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int32x8(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 4)); +#else + return v_int32x8(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 4)); +#endif +} +inline v_int32x8 v256_lut_quads(const int* tab, const int* idx) +{ + return v_int32x8(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1])))); +} +inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); } +inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); } +inline v_uint32x8 v256_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_quads((const int *)tab, idx)); } + +inline v_int64x4 v256_lut(const int64* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int64x4(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 8)); +#else + return v_int64x4(_mm256_i32gather_epi64(tab, _mm_loadu_si128((const __m128i*)idx), 8)); +#endif +} +inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx) +{ + return v_int64x4(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1])))); +} +inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); } +inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); } + +inline v_float32x8 v256_lut(const float* tab, const int* idx) +{ + return v_float32x8(_mm256_i32gather_ps(tab, _mm256_loadu_si256((const __m256i*)idx), 4)); +} +inline v_float32x8 v256_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_pairs((const int *)tab, idx)); } +inline v_float32x8 v256_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_quads((const int *)tab, idx)); } + +inline v_float64x4 v256_lut(const double* tab, const int* idx) +{ + return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8)); +} +inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_v256_combine(_mm_loadu_pd(tab + idx[0]), _mm_loadu_pd(tab + idx[1]))); } + +inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec) +{ + return v_int32x8(_mm256_i32gather_epi32(tab, idxvec.val, 4)); +} + +inline v_uint32x8 v_lut(const unsigned* tab, const v_int32x8& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x8 v_lut(const float* tab, const v_int32x8& idxvec) +{ + return v_float32x8(_mm256_i32gather_ps(tab, idxvec.val, 4)); +} + +inline v_float64x4 v_lut(const double* tab, const v_int32x8& idxvec) +{ + return v_float64x4(_mm256_i32gather_pd(tab, _mm256_castsi256_si128(idxvec.val), 8)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x8& idxvec, v_float32x8& x, v_float32x8& y) +{ + int CV_DECL_ALIGNED(32) idx[8]; + v_store_aligned(idx, idxvec); + __m128 z = _mm_setzero_ps(); + __m128 xy01, xy45, xy23, xy67; + xy01 = _mm_loadl_pi(z, (const __m64*)(tab + idx[0])); + xy01 = _mm_loadh_pi(xy01, (const __m64*)(tab + idx[1])); + xy45 = _mm_loadl_pi(z, (const __m64*)(tab + idx[4])); + xy45 = _mm_loadh_pi(xy45, (const __m64*)(tab + idx[5])); + __m256 xy0145 = _v256_combine(xy01, xy45); + xy23 = _mm_loadl_pi(z, (const __m64*)(tab + idx[2])); + xy23 = _mm_loadh_pi(xy23, (const __m64*)(tab + idx[3])); + xy67 = _mm_loadl_pi(z, (const __m64*)(tab + idx[6])); + xy67 = _mm_loadh_pi(xy67, (const __m64*)(tab + idx[7])); + __m256 xy2367 = _v256_combine(xy23, xy67); + + __m256 xxyy0145 = _mm256_unpacklo_ps(xy0145, xy2367); + __m256 xxyy2367 = _mm256_unpackhi_ps(xy0145, xy2367); + + x = v_float32x8(_mm256_unpacklo_ps(xxyy0145, xxyy2367)); + y = v_float32x8(_mm256_unpackhi_ps(xxyy0145, xxyy2367)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x8& idxvec, v_float64x4& x, v_float64x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_low(idx, idxvec); + __m128d xy0 = _mm_loadu_pd(tab + idx[0]); + __m128d xy2 = _mm_loadu_pd(tab + idx[2]); + __m128d xy1 = _mm_loadu_pd(tab + idx[1]); + __m128d xy3 = _mm_loadu_pd(tab + idx[3]); + __m256d xy02 = _v256_combine(xy0, xy2); + __m256d xy13 = _v256_combine(xy1, xy3); + + x = v_float64x4(_mm256_unpacklo_pd(xy02, xy13)); + y = v_float64x4(_mm256_unpackhi_pd(xy02, xy13)); +} + +inline v_int8x32 v_interleave_pairs(const v_int8x32& vec) +{ + return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200, 0x0f0d0e0c0b090a08, 0x0705060403010200))); +} +inline v_uint8x32 v_interleave_pairs(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x32 v_interleave_quads(const v_int8x32& vec) +{ + return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400, 0x0f0b0e0a0d090c08, 0x0703060205010400))); +} +inline v_uint8x32 v_interleave_quads(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x16 v_interleave_pairs(const v_int16x16& vec) +{ + return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100, 0x0f0e0b0a0d0c0908, 0x0706030205040100))); +} +inline v_uint16x16 v_interleave_pairs(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x16 v_interleave_quads(const v_int16x16& vec) +{ + return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100, 0x0f0e07060d0c0504, 0x0b0a030209080100))); +} +inline v_uint16x16 v_interleave_quads(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x8 v_interleave_pairs(const v_int32x8& vec) +{ + return v_int32x8(_mm256_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0))); +} +inline v_uint32x8 v_interleave_pairs(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x8 v_interleave_pairs(const v_float32x8& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x32 v_pack_triplets(const v_int8x32& vec) +{ + return v_int8x32(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100))), + _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint8x32 v_pack_triplets(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x16 v_pack_triplets(const v_int16x16& vec) +{ + return v_int16x16(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100))), + _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint16x16 v_pack_triplets(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x8 v_pack_triplets(const v_int32x8& vec) +{ + return v_int32x8(_mm256_permutevar8x32_epi32(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint32x8 v_pack_triplets(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); } +inline v_float32x8 v_pack_triplets(const v_float32x8& vec) +{ + return v_float32x8(_mm256_permutevar8x32_ps(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} + +////////// Matrix operations ///////// + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b) +{ return v_int32x8(_mm256_madd_epi16(a.val, b.val)); } +inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b) +{ + __m256i even = _mm256_mul_epi32(a.val, b.val); + __m256i odd = _mm256_mul_epi32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32)); + return v_int64x4(_mm256_add_epi64(even, odd)); +} +inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i even_m = _mm256_set1_epi32(0xFF00FF00); + __m256i even_a = _mm256_blendv_epi8(a.val, _mm256_setzero_si256(), even_m); + __m256i odd_a = _mm256_srli_epi16(a.val, 8); + + __m256i even_b = _mm256_blendv_epi8(b.val, _mm256_setzero_si256(), even_m); + __m256i odd_b = _mm256_srli_epi16(b.val, 8); + + __m256i prod0 = _mm256_madd_epi16(even_a, even_b); + __m256i prod1 = _mm256_madd_epi16(odd_a, odd_b); + return v_uint32x8(_mm256_add_epi32(prod0, prod1)); +} +inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b) +{ + __m256i even_a = _mm256_srai_epi16(_mm256_bslli_epi128(a.val, 1), 8); + __m256i odd_a = _mm256_srai_epi16(a.val, 8); + + __m256i even_b = _mm256_srai_epi16(_mm256_bslli_epi128(b.val, 1), 8); + __m256i odd_b = _mm256_srai_epi16(b.val, 8); + + __m256i prod0 = _mm256_madd_epi16(even_a, even_b); + __m256i prod1 = _mm256_madd_epi16(odd_a, odd_b); + return v_int32x8(_mm256_add_epi32(prod0, prod1)); +} +inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i mullo = _mm256_mullo_epi16(a.val, b.val); + __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val); + __m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi); + __m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi); + + __m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA); + __m256i p13 = _mm256_srli_epi64(mul0, 32); + __m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA); + __m256i p57 = _mm256_srli_epi64(mul1, 32); + + __m256i p15_ = _mm256_add_epi64(p02, p13); + __m256i p9d_ = _mm256_add_epi64(p46, p57); + + return v_uint64x4(_mm256_add_epi64( + _mm256_unpacklo_epi64(p15_, p9d_), + _mm256_unpackhi_epi64(p15_, p9d_) + )); +} +inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b) +{ + __m256i prod = _mm256_madd_epi16(a.val, b.val); + __m256i sign = _mm256_srai_epi32(prod, 31); + + __m256i lo = _mm256_unpacklo_epi32(prod, sign); + __m256i hi = _mm256_unpackhi_epi32(prod, sign); + + return v_int64x4(_mm256_add_epi64( + _mm256_unpacklo_epi64(lo, hi), + _mm256_unpackhi_epi64(lo, hi) + )); +} +inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b) +{ return v_dotprod(a, b); } +inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b) +{ return v_dotprod(a, b); } +inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i mullo = _mm256_mullo_epi16(a.val, b.val); + __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val); + __m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi); + __m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi); + + __m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA); + __m256i p13 = _mm256_srli_epi64(mul0, 32); + __m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA); + __m256i p57 = _mm256_srli_epi64(mul1, 32); + + __m256i p15_ = _mm256_add_epi64(p02, p13); + __m256i p9d_ = _mm256_add_epi64(p46, p57); + + return v_uint64x4(_mm256_add_epi64(p15_, p9d_)); +} +inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b) +{ + __m256i prod = _mm256_madd_epi16(a.val, b.val); + __m256i sign = _mm256_srai_epi32(prod, 31); + __m256i lo = _mm256_unpacklo_epi32(prod, sign); + __m256i hi = _mm256_unpackhi_epi32(prod, sign); + return v_int64x4(_mm256_add_epi64(lo, hi)); +} +inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_AVX_SPLAT2_PS(a, im) \ + v_float32x8(_mm256_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im))) + +inline v_float32x8 v_matmul(const v_float32x8& v, const v_float32x8& m0, + const v_float32x8& m1, const v_float32x8& m2, + const v_float32x8& m3) +{ + v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0); + v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1); + v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2); + v_float32x8 v37 = OPENCV_HAL_AVX_SPLAT2_PS(v, 3); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3))); +} + +inline v_float32x8 v_matmuladd(const v_float32x8& v, const v_float32x8& m0, + const v_float32x8& m1, const v_float32x8& m2, + const v_float32x8& a) +{ + v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0); + v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1); + v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a))); +} + +#define OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ + inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ + { \ + __m256i t0 = cast_from(_mm256_unpacklo_##suffix(a0.val, a1.val)); \ + __m256i t1 = cast_from(_mm256_unpacklo_##suffix(a2.val, a3.val)); \ + __m256i t2 = cast_from(_mm256_unpackhi_##suffix(a0.val, a1.val)); \ + __m256i t3 = cast_from(_mm256_unpackhi_##suffix(a2.val, a3.val)); \ + b0.val = cast_to(_mm256_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm256_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm256_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm256_unpackhi_epi64(t2, t3)); \ + } + +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_uint32x8, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_int32x8, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_float32x8, ps, _mm256_castps_si256, _mm256_castsi256_ps) + +//////////////// Value reordering /////////////// + +/* Expand */ +#define OPENCV_HAL_IMPL_AVX_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(_v256_extract_low(a.val)); \ + b1.val = intrin(_v256_extract_high(a.val)); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v256_extract_low(a.val))); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v256_extract_high(a.val))); } \ + inline _Tpwvec v256_load_expand(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint8x32, v_uint16x16, uchar, _mm256_cvtepu8_epi16) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int8x32, v_int16x16, schar, _mm256_cvtepi8_epi16) +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint16x16, v_uint32x8, ushort, _mm256_cvtepu16_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int16x16, v_int32x8, short, _mm256_cvtepi16_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint32x8, v_uint64x4, unsigned, _mm256_cvtepu32_epi64) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int32x8, v_int64x4, int, _mm256_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_AVX_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v256_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_uint32x8, uchar, _mm256_cvtepu8_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_int32x8, schar, _mm256_cvtepi8_epi32) + +/* pack */ +// 16 +inline v_int8x32 v_pack(const v_int16x16& a, const v_int16x16& b) +{ return v_int8x32(_v256_shuffle_odd_64(_mm256_packs_epi16(a.val, b.val))); } + +inline v_uint8x32 v_pack(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i t = _mm256_set1_epi16(255); + __m256i a1 = _mm256_min_epu16(a.val, t); + __m256i b1 = _mm256_min_epu16(b.val, t); + return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a1, b1))); +} + +inline v_uint8x32 v_pack_u(const v_int16x16& a, const v_int16x16& b) +{ + return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a.val, b.val))); +} + +inline void v_pack_store(schar* ptr, const v_int16x16& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(uchar* ptr, const v_uint16x16& a) +{ + const __m256i m = _mm256_set1_epi16(255); + __m256i am = _mm256_min_epu16(a.val, m); + am = _v256_shuffle_odd_64(_mm256_packus_epi16(am, am)); + v_store_low(ptr, v_uint8x32(am)); +} + +inline void v_pack_u_store(uchar* ptr, const v_int16x16& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + +template inline +v_uint8x32 v_rshr_pack(const v_uint16x16& a, const v_uint16x16& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1))); + return v_pack_u(v_reinterpret_as_s16((a + delta) >> n), + v_reinterpret_as_s16((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x16& a) +{ + v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1))); + v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n)); +} + +template inline +v_uint8x32 v_rshr_pack_u(const v_int16x16& a, const v_int16x16& b) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x16& a) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int8x32 v_rshr_pack(const v_int16x16& a, const v_int16x16& b) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x16& a) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + v_pack_store(ptr, (a + delta) >> n); +} + +// 32 +inline v_int16x16 v_pack(const v_int32x8& a, const v_int32x8& b) +{ return v_int16x16(_v256_shuffle_odd_64(_mm256_packs_epi32(a.val, b.val))); } + +inline v_uint16x16 v_pack(const v_uint32x8& a, const v_uint32x8& b) +{ return v_uint16x16(_v256_shuffle_odd_64(_v256_packs_epu32(a.val, b.val))); } + +inline v_uint16x16 v_pack_u(const v_int32x8& a, const v_int32x8& b) +{ return v_uint16x16(_v256_shuffle_odd_64(_mm256_packus_epi32(a.val, b.val))); } + +inline void v_pack_store(short* ptr, const v_int32x8& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x8& a) +{ + const __m256i m = _mm256_set1_epi32(65535); + __m256i am = _mm256_min_epu32(a.val, m); + am = _v256_shuffle_odd_64(_mm256_packus_epi32(am, am)); + v_store_low(ptr, v_uint16x16(am)); +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x8& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + + +template inline +v_uint16x16 v_rshr_pack(const v_uint32x8& a, const v_uint32x8& b) +{ + // we assume that n > 0, and so the shifted 32-bit values can be treated as signed numbers. + v_uint32x8 delta = v256_setall_u32(1 << (n-1)); + return v_pack_u(v_reinterpret_as_s32((a + delta) >> n), + v_reinterpret_as_s32((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x8& a) +{ + v_uint32x8 delta = v256_setall_u32(1 << (n-1)); + v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n)); +} + +template inline +v_uint16x16 v_rshr_pack_u(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x8& a) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int16x16 v_rshr_pack(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x8& a) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// 64 +// Non-saturating pack +inline v_uint32x8 v_pack(const v_uint64x4& a, const v_uint64x4& b) +{ + __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0)); + __m256i b0 = _mm256_shuffle_epi32(b.val, _MM_SHUFFLE(0, 0, 2, 0)); + __m256i ab = _mm256_unpacklo_epi64(a0, b0); // a0, a1, b0, b1, a2, a3, b2, b3 + return v_uint32x8(_v256_shuffle_odd_64(ab)); +} + +inline v_int32x8 v_pack(const v_int64x4& a, const v_int64x4& b) +{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); } + +inline void v_pack_store(unsigned* ptr, const v_uint64x4& a) +{ + __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0)); + v_store_low(ptr, v_uint32x8(_v256_shuffle_odd_64(a0))); +} + +inline void v_pack_store(int* ptr, const v_int64x4& b) +{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); } + +template inline +v_uint32x8 v_rshr_pack(const v_uint64x4& a, const v_uint64x4& b) +{ + v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x4& a) +{ + v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +template inline +v_int32x8 v_rshr_pack(const v_int64x4& a, const v_int64x4& b) +{ + v_int64x4 delta = v256_setall_s64((int64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x4& a) +{ + v_int64x4 delta = v256_setall_s64((int64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// pack boolean +inline v_uint8x32 v_pack_b(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i ab = _mm256_packs_epi16(a.val, b.val); + return v_uint8x32(_v256_shuffle_odd_64(ab)); +} + +inline v_uint8x32 v_pack_b(const v_uint32x8& a, const v_uint32x8& b, + const v_uint32x8& c, const v_uint32x8& d) +{ + __m256i ab = _mm256_packs_epi32(a.val, b.val); + __m256i cd = _mm256_packs_epi32(c.val, d.val); + + __m256i abcd = _v256_shuffle_odd_64(_mm256_packs_epi16(ab, cd)); + return v_uint8x32(_mm256_shuffle_epi32(abcd, _MM_SHUFFLE(3, 1, 2, 0))); +} + +inline v_uint8x32 v_pack_b(const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c, + const v_uint64x4& d, const v_uint64x4& e, const v_uint64x4& f, + const v_uint64x4& g, const v_uint64x4& h) +{ + __m256i ab = _mm256_packs_epi32(a.val, b.val); + __m256i cd = _mm256_packs_epi32(c.val, d.val); + __m256i ef = _mm256_packs_epi32(e.val, f.val); + __m256i gh = _mm256_packs_epi32(g.val, h.val); + + __m256i abcd = _mm256_packs_epi32(ab, cd); + __m256i efgh = _mm256_packs_epi32(ef, gh); + __m256i pkall = _v256_shuffle_odd_64(_mm256_packs_epi16(abcd, efgh)); + + __m256i rev = _mm256_alignr_epi8(pkall, pkall, 8); + return v_uint8x32(_mm256_unpacklo_epi16(pkall, rev)); +} + +/* Recombine */ +// its up there with load and store operations + +/* Extract */ +#define OPENCV_HAL_IMPL_AVX_EXTRACT(_Tpvec) \ + template \ + inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ + { return v_rotate_right(a, b); } + +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint8x32) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int8x32) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint16x16) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int16x16) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint64x4) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int64x4) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_float32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_float64x4) + +template +inline uchar v_extract_n(v_uint8x32 a) +{ + return (uchar)_v256_extract_epi8(a.val); +} + +template +inline schar v_extract_n(v_int8x32 a) +{ + return (schar)v_extract_n(v_reinterpret_as_u8(a)); +} + +template +inline ushort v_extract_n(v_uint16x16 a) +{ + return (ushort)_v256_extract_epi16(a.val); +} + +template +inline short v_extract_n(v_int16x16 a) +{ + return (short)v_extract_n(v_reinterpret_as_u16(a)); +} + +template +inline uint v_extract_n(v_uint32x8 a) +{ + return (uint)_v256_extract_epi32(a.val); +} + +template +inline int v_extract_n(v_int32x8 a) +{ + return (int)v_extract_n(v_reinterpret_as_u32(a)); +} + +template +inline uint64 v_extract_n(v_uint64x4 a) +{ + return (uint64)_v256_extract_epi64(a.val); +} + +template +inline int64 v_extract_n(v_int64x4 v) +{ + return (int64)v_extract_n(v_reinterpret_as_u64(v)); +} + +template +inline float v_extract_n(v_float32x8 v) +{ + union { uint iv; float fv; } d; + d.iv = v_extract_n(v_reinterpret_as_u32(v)); + return d.fv; +} + +template +inline double v_extract_n(v_float64x4 v) +{ + union { uint64 iv; double dv; } d; + d.iv = v_extract_n(v_reinterpret_as_u64(v)); + return d.dv; +} + +template +inline v_uint32x8 v_broadcast_element(v_uint32x8 a) +{ + static const __m256i perm = _mm256_set1_epi32((char)i); + return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm)); +} + +template +inline v_int32x8 v_broadcast_element(const v_int32x8 &a) +{ return v_reinterpret_as_s32(v_broadcast_element(v_reinterpret_as_u32(a))); } + +template +inline v_float32x8 v_broadcast_element(const v_float32x8 &a) +{ return v_reinterpret_as_f32(v_broadcast_element(v_reinterpret_as_u32(a))); } + + +///////////////////// load deinterleave ///////////////////////////// + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + + const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, + 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + __m256i p0 = _mm256_shuffle_epi8(ab0, sh); + __m256i p1 = _mm256_shuffle_epi8(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint8x32(a0); + b = v_uint8x32(b0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + + const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); + __m256i p0 = _mm256_shuffle_epi8(ab0, sh); + __m256i p1 = _mm256_shuffle_epi8(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint16x16(a0); + b = v_uint16x16(b0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + + const int sh = 0+2*4+1*16+3*64; + __m256i p0 = _mm256_shuffle_epi32(ab0, sh); + __m256i p1 = _mm256_shuffle_epi32(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint32x8(a0); + b = v_uint32x8(b0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + + __m256i pl = _mm256_permute2x128_si256(ab0, ab1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(ab0, ab1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint64x4(a0); + b = v_uint64x4(b0); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1); + + __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); + __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); + __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); + + const __m256i + sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, + 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13), + sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, + 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14), + sh_r = _mm256_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, + 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); + b0 = _mm256_shuffle_epi8(b0, sh_b); + g0 = _mm256_shuffle_epi8(g0, sh_g); + r0 = _mm256_shuffle_epi8(r0, sh_r); + + a = v_uint8x32(b0); + b = v_uint8x32(g0); + c = v_uint8x32(r0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); + __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); + __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); + __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); + const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, + 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); + const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + b0 = _mm256_shuffle_epi8(b0, sh_b); + g0 = _mm256_shuffle_epi8(g0, sh_g); + r0 = _mm256_shuffle_epi8(r0, sh_r); + + a = v_uint16x16(b0); + b = v_uint16x16(g0); + c = v_uint16x16(r0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + __m256i b0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_low, s02_high, 0x24), bgr1, 0x92); + __m256i g0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_high, s02_low, 0x92), bgr1, 0x24); + __m256i r0 = _mm256_blend_epi32(_mm256_blend_epi32(bgr1, s02_low, 0x24), s02_high, 0x92); + + b0 = _mm256_shuffle_epi32(b0, 0x6c); + g0 = _mm256_shuffle_epi32(g0, 0xb1); + r0 = _mm256_shuffle_epi32(r0, 0xc6); + + a = v_uint32x8(b0); + b = v_uint32x8(g0); + c = v_uint32x8(r0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + + __m256i s01 = _mm256_blend_epi32(bgr0, bgr1, 0xf0); + __m256i s12 = _mm256_blend_epi32(bgr1, bgr2, 0xf0); + __m256i s20r = _mm256_permute4x64_epi64(_mm256_blend_epi32(bgr2, bgr0, 0xf0), 0x1b); + __m256i b0 = _mm256_unpacklo_epi64(s01, s20r); + __m256i g0 = _mm256_alignr_epi8(s12, s01, 8); + __m256i r0 = _mm256_unpackhi_epi64(s20r, s12); + + a = v_uint64x4(b0); + b = v_uint64x4(g0); + c = v_uint64x4(r0); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c, v_uint8x32& d ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64)); + __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96)); + const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + + __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); + __m256i p1 = _mm256_shuffle_epi8(bgr1, sh); + __m256i p2 = _mm256_shuffle_epi8(bgr2, sh); + __m256i p3 = _mm256_shuffle_epi8(bgr3, sh); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint8x32(b0); + b = v_uint8x32(g0); + c = v_uint8x32(r0); + d = v_uint8x32(a0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c, v_uint16x16& d ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48)); + const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, + 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15); + __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); + __m256i p1 = _mm256_shuffle_epi8(bgr1, sh); + __m256i p2 = _mm256_shuffle_epi8(bgr2, sh); + __m256i p3 = _mm256_shuffle_epi8(bgr3, sh); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint16x16(b0); + b = v_uint16x16(g0); + c = v_uint16x16(r0); + d = v_uint16x16(a0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c, v_uint32x8& d ) +{ + __m256i p0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i p1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i p2 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i p3 = _mm256_loadu_si256((const __m256i*)(ptr + 24)); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint32x8(b0); + b = v_uint32x8(g0); + c = v_uint32x8(r0); + d = v_uint32x8(a0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c, v_uint64x4& d ) +{ + __m256i bgra0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgra1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + __m256i bgra2 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i bgra3 = _mm256_loadu_si256((const __m256i*)(ptr + 12)); + + __m256i l02 = _mm256_permute2x128_si256(bgra0, bgra2, 0 + 2*16); + __m256i h02 = _mm256_permute2x128_si256(bgra0, bgra2, 1 + 3*16); + __m256i l13 = _mm256_permute2x128_si256(bgra1, bgra3, 0 + 2*16); + __m256i h13 = _mm256_permute2x128_si256(bgra1, bgra3, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi64(l02, l13); + __m256i g0 = _mm256_unpackhi_epi64(l02, l13); + __m256i r0 = _mm256_unpacklo_epi64(h02, h13); + __m256i a0 = _mm256_unpackhi_epi64(h02, h13); + + a = v_uint64x4(b0); + b = v_uint64x4(g0); + c = v_uint64x4(r0); + d = v_uint64x4(a0); +} + +///////////////////////////// store interleave ///////////////////////////////////// + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 32), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 32), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 32), xy1); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 16), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 16), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 16), xy1); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 8), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 8), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 8), xy1); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 4), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 4), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 4), xy1); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b, const v_uint8x32& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + const __m256i sh_b = _mm256_setr_epi8( + 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, + 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); + const __m256i sh_g = _mm256_setr_epi8( + 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, + 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); + const __m256i sh_r = _mm256_setr_epi8( + 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, + 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); + + __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b); + __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g); + __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r); + + const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + + __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); + __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1); + __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16); + __m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 32), bgr1); + _mm256_store_si256((__m256i*)(ptr + 64), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgr2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b, const v_uint16x16& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + const __m256i sh_b = _mm256_setr_epi8( + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m256i sh_g = _mm256_setr_epi8( + 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, + 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); + const __m256i sh_r = _mm256_setr_epi8( + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + + __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b); + __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g); + __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); + + __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); + __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1); + __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p2, 0 + 2*16); + //__m256i bgr1 = p1; + __m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 16), p1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 16), p1); + _mm256_store_si256((__m256i*)(ptr + 32), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 16), p1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr2); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b, const v_uint32x8& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i b0 = _mm256_shuffle_epi32(a.val, 0x6c); + __m256i g0 = _mm256_shuffle_epi32(b.val, 0xb1); + __m256i r0 = _mm256_shuffle_epi32(c.val, 0xc6); + + __m256i p0 = _mm256_blend_epi32(_mm256_blend_epi32(b0, g0, 0x92), r0, 0x24); + __m256i p1 = _mm256_blend_epi32(_mm256_blend_epi32(g0, r0, 0x92), b0, 0x24); + __m256i p2 = _mm256_blend_epi32(_mm256_blend_epi32(r0, b0, 0x92), g0, 0x24); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + //__m256i bgr1 = p2; + __m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 8), p2); + _mm256_stream_si256((__m256i*)(ptr + 16), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 8), p2); + _mm256_store_si256((__m256i*)(ptr + 16), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 8), p2); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgr2); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i s01 = _mm256_unpacklo_epi64(a.val, b.val); + __m256i s12 = _mm256_unpackhi_epi64(b.val, c.val); + __m256i s20 = _mm256_blend_epi32(c.val, a.val, 0xcc); + + __m256i bgr0 = _mm256_permute2x128_si256(s01, s20, 0 + 2*16); + __m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f); + __m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 4), bgr1); + _mm256_store_si256((__m256i*)(ptr + 8), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgr2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b, + const v_uint8x32& c, const v_uint8x32& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi8(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi8(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi8(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi8(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi16(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi16(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi16(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi16(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 96), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 32), bgra1); + _mm256_store_si256((__m256i*)(ptr + 64), bgra2); + _mm256_store_si256((__m256i*)(ptr + 96), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 96), bgra3); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b, + const v_uint16x16& c, const v_uint16x16& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi16(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi16(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi16(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi16(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi32(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi32(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi32(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi32(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 48), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 16), bgra1); + _mm256_store_si256((__m256i*)(ptr + 32), bgra2); + _mm256_store_si256((__m256i*)(ptr + 48), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 48), bgra3); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b, + const v_uint32x8& c, const v_uint32x8& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi32(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi32(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi32(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi32(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi64(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi64(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi64(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi64(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 24), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 8), bgra1); + _mm256_store_si256((__m256i*)(ptr + 16), bgra2); + _mm256_store_si256((__m256i*)(ptr + 24), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 24), bgra3); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b, + const v_uint64x4& c, const v_uint64x4& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi64(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi64(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi64(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi64(c.val, d.val); + + __m256i bgra0 = _mm256_permute2x128_si256(bg0, ra0, 0 + 2*16); + __m256i bgra1 = _mm256_permute2x128_si256(bg1, ra1, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16); + __m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 12), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 4), bgra1); + _mm256_store_si256((__m256i*)(ptr + 8), bgra2); + _mm256_store_si256((__m256i*)(ptr + 12), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 12), bgra3); + } +} + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int16x16, short, s16, v_uint16x16, ushort, u16) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int32x8, int, s32, v_uint32x8, unsigned, u32) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, unsigned, u32) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64) + +// +// FP16 +// + +inline v_float32x8 v256_load_expand(const float16_t* ptr) +{ +#if CV_FP16 + return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); +#else + float CV_DECL_ALIGNED(32) buf[8]; + for (int i = 0; i < 8; i++) + buf[i] = (float)ptr[i]; + return v256_load_aligned(buf); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x8& a) +{ +#if CV_FP16 + __m128i ah = _mm256_cvtps_ph(a.val, 0); + _mm_storeu_si128((__m128i*)ptr, ah); +#else + float CV_DECL_ALIGNED(32) buf[8]; + v_store_aligned(buf, a); + for (int i = 0; i < 8; i++) + ptr[i] = float16_t(buf[i]); +#endif +} + +// +// end of FP16 +// + +inline void v256_cleanup() { _mm256_zeroall(); } + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_AVX_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_avx512.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_avx512.hpp new file mode 100644 index 0000000..75a3bd4 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_avx512.hpp @@ -0,0 +1,3078 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_AVX512_HPP +#define OPENCV_HAL_INTRIN_AVX512_HPP + +#if defined(_MSC_VER) && (_MSC_VER < 1920/*MSVS2019*/) +# pragma warning(disable:4146) // unary minus operator applied to unsigned type, result still unsigned +# pragma warning(disable:4309) // 'argument': truncation of constant value +# pragma warning(disable:4310) // cast truncates constant value +#endif + +#define CVT_ROUND_MODES_IMPLEMENTED 0 + +#define CV_SIMD512 1 +#define CV_SIMD512_64F 1 +#define CV_SIMD512_FP16 0 // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1) + +#define _v512_set_epu64(a7, a6, a5, a4, a3, a2, a1, a0) _mm512_set_epi64((int64)(a7),(int64)(a6),(int64)(a5),(int64)(a4),(int64)(a3),(int64)(a2),(int64)(a1),(int64)(a0)) +#define _v512_set_epu32(a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _mm512_set_epi64(((int64)(a15)<<32)|(int64)(a14), ((int64)(a13)<<32)|(int64)(a12), ((int64)(a11)<<32)|(int64)(a10), ((int64)( a9)<<32)|(int64)( a8), \ + ((int64)( a7)<<32)|(int64)( a6), ((int64)( a5)<<32)|(int64)( a4), ((int64)( a3)<<32)|(int64)( a2), ((int64)( a1)<<32)|(int64)( a0)) +#define _v512_set_epu16(a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu32(((unsigned)(a31)<<16)|(unsigned)(a30), ((unsigned)(a29)<<16)|(unsigned)(a28), ((unsigned)(a27)<<16)|(unsigned)(a26), ((unsigned)(a25)<<16)|(unsigned)(a24), \ + ((unsigned)(a23)<<16)|(unsigned)(a22), ((unsigned)(a21)<<16)|(unsigned)(a20), ((unsigned)(a19)<<16)|(unsigned)(a18), ((unsigned)(a17)<<16)|(unsigned)(a16), \ + ((unsigned)(a15)<<16)|(unsigned)(a14), ((unsigned)(a13)<<16)|(unsigned)(a12), ((unsigned)(a11)<<16)|(unsigned)(a10), ((unsigned)( a9)<<16)|(unsigned)( a8), \ + ((unsigned)( a7)<<16)|(unsigned)( a6), ((unsigned)( a5)<<16)|(unsigned)( a4), ((unsigned)( a3)<<16)|(unsigned)( a2), ((unsigned)( a1)<<16)|(unsigned)( a0)) +#define _v512_set_epu8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \ + a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \ + a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu32(((unsigned)(a63)<<24)|((unsigned)(a62)<<16)|((unsigned)(a61)<<8)|(unsigned)(a60),((unsigned)(a59)<<24)|((unsigned)(a58)<<16)|((unsigned)(a57)<<8)|(unsigned)(a56), \ + ((unsigned)(a55)<<24)|((unsigned)(a54)<<16)|((unsigned)(a53)<<8)|(unsigned)(a52),((unsigned)(a51)<<24)|((unsigned)(a50)<<16)|((unsigned)(a49)<<8)|(unsigned)(a48), \ + ((unsigned)(a47)<<24)|((unsigned)(a46)<<16)|((unsigned)(a45)<<8)|(unsigned)(a44),((unsigned)(a43)<<24)|((unsigned)(a42)<<16)|((unsigned)(a41)<<8)|(unsigned)(a40), \ + ((unsigned)(a39)<<24)|((unsigned)(a38)<<16)|((unsigned)(a37)<<8)|(unsigned)(a36),((unsigned)(a35)<<24)|((unsigned)(a34)<<16)|((unsigned)(a33)<<8)|(unsigned)(a32), \ + ((unsigned)(a31)<<24)|((unsigned)(a30)<<16)|((unsigned)(a29)<<8)|(unsigned)(a28),((unsigned)(a27)<<24)|((unsigned)(a26)<<16)|((unsigned)(a25)<<8)|(unsigned)(a24), \ + ((unsigned)(a23)<<24)|((unsigned)(a22)<<16)|((unsigned)(a21)<<8)|(unsigned)(a20),((unsigned)(a19)<<24)|((unsigned)(a18)<<16)|((unsigned)(a17)<<8)|(unsigned)(a16), \ + ((unsigned)(a15)<<24)|((unsigned)(a14)<<16)|((unsigned)(a13)<<8)|(unsigned)(a12),((unsigned)(a11)<<24)|((unsigned)(a10)<<16)|((unsigned)( a9)<<8)|(unsigned)( a8), \ + ((unsigned)( a7)<<24)|((unsigned)( a6)<<16)|((unsigned)( a5)<<8)|(unsigned)( a4),((unsigned)( a3)<<24)|((unsigned)( a2)<<16)|((unsigned)( a1)<<8)|(unsigned)( a0)) +#define _v512_set_epi8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \ + a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \ + a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu8((uchar)(a63), (uchar)(a62), (uchar)(a61), (uchar)(a60), (uchar)(a59), (uchar)(a58), (uchar)(a57), (uchar)(a56), \ + (uchar)(a55), (uchar)(a54), (uchar)(a53), (uchar)(a52), (uchar)(a51), (uchar)(a50), (uchar)(a49), (uchar)(a48), \ + (uchar)(a47), (uchar)(a46), (uchar)(a45), (uchar)(a44), (uchar)(a43), (uchar)(a42), (uchar)(a41), (uchar)(a40), \ + (uchar)(a39), (uchar)(a38), (uchar)(a37), (uchar)(a36), (uchar)(a35), (uchar)(a34), (uchar)(a33), (uchar)(a32), \ + (uchar)(a31), (uchar)(a30), (uchar)(a29), (uchar)(a28), (uchar)(a27), (uchar)(a26), (uchar)(a25), (uchar)(a24), \ + (uchar)(a23), (uchar)(a22), (uchar)(a21), (uchar)(a20), (uchar)(a19), (uchar)(a18), (uchar)(a17), (uchar)(a16), \ + (uchar)(a15), (uchar)(a14), (uchar)(a13), (uchar)(a12), (uchar)(a11), (uchar)(a10), (uchar)( a9), (uchar)( a8), \ + (uchar)( a7), (uchar)( a6), (uchar)( a5), (uchar)( a4), (uchar)( a3), (uchar)( a2), (uchar)( a1), (uchar)( a0)) + +#ifndef _mm512_cvtpd_pslo +#ifdef _mm512_zextsi256_si512 +#define _mm512_cvtpd_pslo(a) _mm512_zextps256_ps512(_mm512_cvtpd_ps(a)) +#else +//if preferred way to extend with zeros is unavailable +#define _mm512_cvtpd_pslo(a) _mm512_castps256_ps512(_mm512_cvtpd_ps(a)) +#endif +#endif +///////// Utils //////////// + +namespace +{ + +inline __m512i _v512_combine(const __m256i& lo, const __m256i& hi) +{ return _mm512_inserti32x8(_mm512_castsi256_si512(lo), hi, 1); } + +inline __m512 _v512_combine(const __m256& lo, const __m256& hi) +{ return _mm512_insertf32x8(_mm512_castps256_ps512(lo), hi, 1); } + +inline __m512d _v512_combine(const __m256d& lo, const __m256d& hi) +{ return _mm512_insertf64x4(_mm512_castpd256_pd512(lo), hi, 1); } + +inline int _v_cvtsi512_si32(const __m512i& a) +{ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); } + +inline __m256i _v512_extract_high(const __m512i& v) +{ return _mm512_extracti32x8_epi32(v, 1); } + +inline __m256 _v512_extract_high(const __m512& v) +{ return _mm512_extractf32x8_ps(v, 1); } + +inline __m256d _v512_extract_high(const __m512d& v) +{ return _mm512_extractf64x4_pd(v, 1); } + +inline __m256i _v512_extract_low(const __m512i& v) +{ return _mm512_castsi512_si256(v); } + +inline __m256 _v512_extract_low(const __m512& v) +{ return _mm512_castps512_ps256(v); } + +inline __m256d _v512_extract_low(const __m512d& v) +{ return _mm512_castpd512_pd256(v); } + +inline __m512i _v512_insert(const __m512i& a, const __m256i& b) +{ return _mm512_inserti32x8(a, b, 0); } + +inline __m512 _v512_insert(const __m512& a, const __m256& b) +{ return _mm512_insertf32x8(a, b, 0); } + +inline __m512d _v512_insert(const __m512d& a, const __m256d& b) +{ return _mm512_insertf64x4(a, b, 0); } + +} + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x64 +{ + typedef uchar lane_type; + enum { nlanes = 64 }; + __m512i val; + + explicit v_uint8x64(__m512i v) : val(v) {} + v_uint8x64(uchar v0, uchar v1, uchar v2, uchar v3, + uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, + uchar v12, uchar v13, uchar v14, uchar v15, + uchar v16, uchar v17, uchar v18, uchar v19, + uchar v20, uchar v21, uchar v22, uchar v23, + uchar v24, uchar v25, uchar v26, uchar v27, + uchar v28, uchar v29, uchar v30, uchar v31, + uchar v32, uchar v33, uchar v34, uchar v35, + uchar v36, uchar v37, uchar v38, uchar v39, + uchar v40, uchar v41, uchar v42, uchar v43, + uchar v44, uchar v45, uchar v46, uchar v47, + uchar v48, uchar v49, uchar v50, uchar v51, + uchar v52, uchar v53, uchar v54, uchar v55, + uchar v56, uchar v57, uchar v58, uchar v59, + uchar v60, uchar v61, uchar v62, uchar v63) + { + val = _v512_set_epu8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_uint8x64() {} + + static inline v_uint8x64 zero() { return v_uint8x64(_mm512_setzero_si512()); } + + uchar get0() const { return (uchar)_v_cvtsi512_si32(val); } +}; + +struct v_int8x64 +{ + typedef schar lane_type; + enum { nlanes = 64 }; + __m512i val; + + explicit v_int8x64(__m512i v) : val(v) {} + v_int8x64(schar v0, schar v1, schar v2, schar v3, + schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, + schar v12, schar v13, schar v14, schar v15, + schar v16, schar v17, schar v18, schar v19, + schar v20, schar v21, schar v22, schar v23, + schar v24, schar v25, schar v26, schar v27, + schar v28, schar v29, schar v30, schar v31, + schar v32, schar v33, schar v34, schar v35, + schar v36, schar v37, schar v38, schar v39, + schar v40, schar v41, schar v42, schar v43, + schar v44, schar v45, schar v46, schar v47, + schar v48, schar v49, schar v50, schar v51, + schar v52, schar v53, schar v54, schar v55, + schar v56, schar v57, schar v58, schar v59, + schar v60, schar v61, schar v62, schar v63) + { + val = _v512_set_epi8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_int8x64() {} + + static inline v_int8x64 zero() { return v_int8x64(_mm512_setzero_si512()); } + + schar get0() const { return (schar)_v_cvtsi512_si32(val); } +}; + +struct v_uint16x32 +{ + typedef ushort lane_type; + enum { nlanes = 32 }; + __m512i val; + + explicit v_uint16x32(__m512i v) : val(v) {} + v_uint16x32(ushort v0, ushort v1, ushort v2, ushort v3, + ushort v4, ushort v5, ushort v6, ushort v7, + ushort v8, ushort v9, ushort v10, ushort v11, + ushort v12, ushort v13, ushort v14, ushort v15, + ushort v16, ushort v17, ushort v18, ushort v19, + ushort v20, ushort v21, ushort v22, ushort v23, + ushort v24, ushort v25, ushort v26, ushort v27, + ushort v28, ushort v29, ushort v30, ushort v31) + { + val = _v512_set_epu16(v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_uint16x32() {} + + static inline v_uint16x32 zero() { return v_uint16x32(_mm512_setzero_si512()); } + + ushort get0() const { return (ushort)_v_cvtsi512_si32(val); } +}; + +struct v_int16x32 +{ + typedef short lane_type; + enum { nlanes = 32 }; + __m512i val; + + explicit v_int16x32(__m512i v) : val(v) {} + v_int16x32(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7, + short v8, short v9, short v10, short v11, short v12, short v13, short v14, short v15, + short v16, short v17, short v18, short v19, short v20, short v21, short v22, short v23, + short v24, short v25, short v26, short v27, short v28, short v29, short v30, short v31) + { + val = _v512_set_epu16((ushort)v31, (ushort)v30, (ushort)v29, (ushort)v28, (ushort)v27, (ushort)v26, (ushort)v25, (ushort)v24, + (ushort)v23, (ushort)v22, (ushort)v21, (ushort)v20, (ushort)v19, (ushort)v18, (ushort)v17, (ushort)v16, + (ushort)v15, (ushort)v14, (ushort)v13, (ushort)v12, (ushort)v11, (ushort)v10, (ushort)v9 , (ushort)v8, + (ushort)v7 , (ushort)v6 , (ushort)v5 , (ushort)v4 , (ushort)v3 , (ushort)v2 , (ushort)v1 , (ushort)v0); + } + v_int16x32() {} + + static inline v_int16x32 zero() { return v_int16x32(_mm512_setzero_si512()); } + + short get0() const { return (short)_v_cvtsi512_si32(val); } +}; + +struct v_uint32x16 +{ + typedef unsigned lane_type; + enum { nlanes = 16 }; + __m512i val; + + explicit v_uint32x16(__m512i v) : val(v) {} + v_uint32x16(unsigned v0, unsigned v1, unsigned v2, unsigned v3, + unsigned v4, unsigned v5, unsigned v6, unsigned v7, + unsigned v8, unsigned v9, unsigned v10, unsigned v11, + unsigned v12, unsigned v13, unsigned v14, unsigned v15) + { + val = _mm512_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3, (int)v4, (int)v5, (int)v6, (int)v7, + (int)v8, (int)v9, (int)v10, (int)v11, (int)v12, (int)v13, (int)v14, (int)v15); + } + v_uint32x16() {} + + static inline v_uint32x16 zero() { return v_uint32x16(_mm512_setzero_si512()); } + + unsigned get0() const { return (unsigned)_v_cvtsi512_si32(val); } +}; + +struct v_int32x16 +{ + typedef int lane_type; + enum { nlanes = 16 }; + __m512i val; + + explicit v_int32x16(__m512i v) : val(v) {} + v_int32x16(int v0, int v1, int v2, int v3, int v4, int v5, int v6, int v7, + int v8, int v9, int v10, int v11, int v12, int v13, int v14, int v15) + { + val = _mm512_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); + } + v_int32x16() {} + + static inline v_int32x16 zero() { return v_int32x16(_mm512_setzero_si512()); } + + int get0() const { return _v_cvtsi512_si32(val); } +}; + +struct v_float32x16 +{ + typedef float lane_type; + enum { nlanes = 16 }; + __m512 val; + + explicit v_float32x16(__m512 v) : val(v) {} + v_float32x16(float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, + float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15) + { + val = _mm512_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); + } + v_float32x16() {} + + static inline v_float32x16 zero() { return v_float32x16(_mm512_setzero_ps()); } + + float get0() const { return _mm_cvtss_f32(_mm512_castps512_ps128(val)); } +}; + +struct v_uint64x8 +{ + typedef uint64 lane_type; + enum { nlanes = 8 }; + __m512i val; + + explicit v_uint64x8(__m512i v) : val(v) {} + v_uint64x8(uint64 v0, uint64 v1, uint64 v2, uint64 v3, uint64 v4, uint64 v5, uint64 v6, uint64 v7) + { val = _mm512_setr_epi64((int64)v0, (int64)v1, (int64)v2, (int64)v3, (int64)v4, (int64)v5, (int64)v6, (int64)v7); } + v_uint64x8() {} + + static inline v_uint64x8 zero() { return v_uint64x8(_mm512_setzero_si512()); } + + uint64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (uint64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val)); + int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32))); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #endif + } +}; + +struct v_int64x8 +{ + typedef int64 lane_type; + enum { nlanes = 8 }; + __m512i val; + + explicit v_int64x8(__m512i v) : val(v) {} + v_int64x8(int64 v0, int64 v1, int64 v2, int64 v3, int64 v4, int64 v5, int64 v6, int64 v7) + { val = _mm512_setr_epi64(v0, v1, v2, v3, v4, v5, v6, v7); } + v_int64x8() {} + + static inline v_int64x8 zero() { return v_int64x8(_mm512_setzero_si512()); } + + int64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (int64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val)); + int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32))); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #endif + } +}; + +struct v_float64x8 +{ + typedef double lane_type; + enum { nlanes = 8 }; + __m512d val; + + explicit v_float64x8(__m512d v) : val(v) {} + v_float64x8(double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7) + { val = _mm512_setr_pd(v0, v1, v2, v3, v4, v5, v6, v7); } + v_float64x8() {} + + static inline v_float64x8 zero() { return v_float64x8(_mm512_setzero_pd()); } + + double get0() const { return _mm_cvtsd_f64(_mm512_castpd512_pd128(val)); } +}; + +//////////////// Load and store operations /////////////// + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE(_Tpvec, _Tp) \ + inline _Tpvec v512_load(const _Tp* ptr) \ + { return _Tpvec(_mm512_loadu_si512((const __m512i*)ptr)); } \ + inline _Tpvec v512_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm512_load_si512((const __m512i*)ptr)); } \ + inline _Tpvec v512_load_low(const _Tp* ptr) \ + { \ + __m256i v256 = _mm256_loadu_si256((const __m256i*)ptr); \ + return _Tpvec(_mm512_castsi256_si512(v256)); \ + } \ + inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + __m256i vlo = _mm256_loadu_si256((const __m256i*)ptr0); \ + __m256i vhi = _mm256_loadu_si256((const __m256i*)ptr1); \ + return _Tpvec(_v512_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm512_storeu_si512((__m512i*)ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm512_store_si512((__m512i*)ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm512_stream_si512((__m512i*)ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm512_storeu_si512((__m512i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm512_stream_si512((__m512i*)ptr, a.val); \ + else \ + _mm512_store_si512((__m512i*)ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint8x64, uchar) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int8x64, schar) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint16x32, ushort) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int16x32, short) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint32x16, unsigned) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int32x16, int) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint64x8, uint64) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int64x8, int64) + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg) \ + inline _Tpvec v512_load(const _Tp* ptr) \ + { return _Tpvec(_mm512_loadu_##suffix(ptr)); } \ + inline _Tpvec v512_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm512_load_##suffix(ptr)); } \ + inline _Tpvec v512_load_low(const _Tp* ptr) \ + { \ + return _Tpvec(_mm512_cast##suffix##256_##suffix##512 \ + (_mm256_loadu_##suffix(ptr))); \ + } \ + inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + halfreg vlo = _mm256_loadu_##suffix(ptr0); \ + halfreg vhi = _mm256_loadu_##suffix(ptr1); \ + return _Tpvec(_v512_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm512_storeu_##suffix(ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm512_store_##suffix(ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm512_stream_##suffix(ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm512_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm512_stream_##suffix(ptr, a.val); \ + else \ + _mm512_store_##suffix(ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, _v512_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, _v512_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float32x16, float, ps, __m256) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float64x8, double, pd, __m256d) + +#define OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, _Tpvecf, suffix, cast) \ + inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a) \ + { return _Tpvec(cast(a.val)); } + +#define OPENCV_HAL_IMPL_AVX512_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s) \ + inline _Tpvec v512_setzero_##suffix() \ + { return _Tpvec(_mm512_setzero_si512()); } \ + inline _Tpvec v512_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm512_set1_##ssuffix((ctype_s)v)); } \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float32x16, suffix, _mm512_castps_si512) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float64x8, suffix, _mm512_castpd_si512) + +OPENCV_HAL_IMPL_AVX512_INIT(v_uint8x64, uchar, u8, epi8, char) +OPENCV_HAL_IMPL_AVX512_INIT(v_int8x64, schar, s8, epi8, char) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint16x32, ushort, u16, epi16, short) +OPENCV_HAL_IMPL_AVX512_INIT(v_int16x32, short, s16, epi16, short) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint32x16, unsigned, u32, epi32, int) +OPENCV_HAL_IMPL_AVX512_INIT(v_int32x16, int, s32, epi32, int) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint64x8, uint64, u64, epi64, int64) +OPENCV_HAL_IMPL_AVX512_INIT(v_int64x8, int64, s64, epi64, int64) + +#define OPENCV_HAL_IMPL_AVX512_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \ + inline _Tpvec v512_setzero_##suffix() \ + { return _Tpvec(_mm512_setzero_##zsuffix()); } \ + inline _Tpvec v512_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm512_set1_##zsuffix(v)); } \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8, suffix, cast) + +OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float32x16, float, f32, ps, _mm512_castsi512_ps) +OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float64x8, double, f64, pd, _mm512_castsi512_pd) + +inline v_float32x16 v_reinterpret_as_f32(const v_float32x16& a) +{ return a; } +inline v_float32x16 v_reinterpret_as_f32(const v_float64x8& a) +{ return v_float32x16(_mm512_castpd_ps(a.val)); } + +inline v_float64x8 v_reinterpret_as_f64(const v_float64x8& a) +{ return a; } +inline v_float64x8 v_reinterpret_as_f64(const v_float32x16& a) +{ return v_float64x8(_mm512_castps_pd(a.val)); } + +// FP16 +inline v_float32x16 v512_load_expand(const float16_t* ptr) +{ + return v_float32x16(_mm512_cvtph_ps(_mm256_loadu_si256((const __m256i*)ptr))); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x16& a) +{ + __m256i ah = _mm512_cvtps_ph(a.val, 0); + _mm256_storeu_si256((__m256i*)ptr, ah); +} + +/* Recombine & ZIP */ +inline void v_zip(const v_int8x64& a, const v_int8x64& b, v_int8x64& ab0, v_int8x64& ab1) +{ +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8( 95, 31, 94, 30, 93, 29, 92, 28, 91, 27, 90, 26, 89, 25, 88, 24, + 87, 23, 86, 22, 85, 21, 84, 20, 83, 19, 82, 18, 81, 17, 80, 16, + 79, 15, 78, 14, 77, 13, 76, 12, 75, 11, 74, 10, 73, 9, 72, 8, + 71, 7, 70, 6, 69, 5, 68, 4, 67, 3, 66, 2, 65, 1, 64, 0); + ab0 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu8(127, 63, 126, 62, 125, 61, 124, 60, 123, 59, 122, 58, 121, 57, 120, 56, + 119, 55, 118, 54, 117, 53, 116, 52, 115, 51, 114, 50, 113, 49, 112, 48, + 111, 47, 110, 46, 109, 45, 108, 44, 107, 43, 106, 42, 105, 41, 104, 40, + 103, 39, 102, 38, 101, 37, 100, 36, 99, 35, 98, 34, 97, 33, 96, 32); + ab1 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask1, b.val)); +#else + __m512i low = _mm512_unpacklo_epi8(a.val, b.val); + __m512i high = _mm512_unpackhi_epi8(a.val, b.val); + ab0 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(11, 10, 3, 2, 9, 8, 1, 0), high)); + ab1 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(15, 14, 7, 6, 13, 12, 5, 4), high)); +#endif +} +inline void v_zip(const v_int16x32& a, const v_int16x32& b, v_int16x32& ab0, v_int16x32& ab1) +{ + __m512i mask0 = _v512_set_epu16(47, 15, 46, 14, 45, 13, 44, 12, 43, 11, 42, 10, 41, 9, 40, 8, + 39, 7, 38, 6, 37, 5, 36, 4, 35, 3, 34, 2, 33, 1, 32, 0); + ab0 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu16(63, 31, 62, 30, 61, 29, 60, 28, 59, 27, 58, 26, 57, 25, 56, 24, + 55, 23, 54, 22, 53, 21, 52, 20, 51, 19, 50, 18, 49, 17, 48, 16); + ab1 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask1, b.val)); +} +inline void v_zip(const v_int32x16& a, const v_int32x16& b, v_int32x16& ab0, v_int32x16& ab1) +{ + __m512i mask0 = _v512_set_epu32(23, 7, 22, 6, 21, 5, 20, 4, 19, 3, 18, 2, 17, 1, 16, 0); + ab0 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu32(31, 15, 30, 14, 29, 13, 28, 12, 27, 11, 26, 10, 25, 9, 24, 8); + ab1 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask1, b.val)); +} +inline void v_zip(const v_int64x8& a, const v_int64x8& b, v_int64x8& ab0, v_int64x8& ab1) +{ + __m512i mask0 = _v512_set_epu64(11, 3, 10, 2, 9, 1, 8, 0); + ab0 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu64(15, 7, 14, 6, 13, 5, 12, 4); + ab1 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask1, b.val)); +} + +inline void v_zip(const v_uint8x64& a, const v_uint8x64& b, v_uint8x64& ab0, v_uint8x64& ab1) +{ + v_int8x64 i0, i1; + v_zip(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b), i0, i1); + ab0 = v_reinterpret_as_u8(i0); + ab1 = v_reinterpret_as_u8(i1); +} +inline void v_zip(const v_uint16x32& a, const v_uint16x32& b, v_uint16x32& ab0, v_uint16x32& ab1) +{ + v_int16x32 i0, i1; + v_zip(v_reinterpret_as_s16(a), v_reinterpret_as_s16(b), i0, i1); + ab0 = v_reinterpret_as_u16(i0); + ab1 = v_reinterpret_as_u16(i1); +} +inline void v_zip(const v_uint32x16& a, const v_uint32x16& b, v_uint32x16& ab0, v_uint32x16& ab1) +{ + v_int32x16 i0, i1; + v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1); + ab0 = v_reinterpret_as_u32(i0); + ab1 = v_reinterpret_as_u32(i1); +} +inline void v_zip(const v_uint64x8& a, const v_uint64x8& b, v_uint64x8& ab0, v_uint64x8& ab1) +{ + v_int64x8 i0, i1; + v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1); + ab0 = v_reinterpret_as_u64(i0); + ab1 = v_reinterpret_as_u64(i1); +} +inline void v_zip(const v_float32x16& a, const v_float32x16& b, v_float32x16& ab0, v_float32x16& ab1) +{ + v_int32x16 i0, i1; + v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1); + ab0 = v_reinterpret_as_f32(i0); + ab1 = v_reinterpret_as_f32(i1); +} +inline void v_zip(const v_float64x8& a, const v_float64x8& b, v_float64x8& ab0, v_float64x8& ab1) +{ + v_int64x8 i0, i1; + v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1); + ab0 = v_reinterpret_as_f64(i0); + ab1 = v_reinterpret_as_f64(i1); +} + +#define OPENCV_HAL_IMPL_AVX512_COMBINE(_Tpvec, suffix) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_v512_combine(_v512_extract_low(a.val), _v512_extract_low(b.val))); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_v512_insert(b.val, _v512_extract_high(a.val))); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { \ + c.val = _v512_combine(_v512_extract_low(a.val),_v512_extract_low(b.val)); \ + d.val = _v512_insert(b.val,_v512_extract_high(a.val)); \ + } + + +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint8x64, epi8) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int8x64, epi8) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint16x32, epi16) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int16x32, epi16) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint64x8, epi64) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int64x8, epi64) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_float64x8, pd) + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_AVX512_BIN_FUNC(func, _Tpvec, intrin) \ + inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint8x64, _mm512_add_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int8x64, _mm512_add_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint16x32, _mm512_add_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int16x32, _mm512_add_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint8x64, _mm512_sub_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int8x64, _mm512_sub_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint16x32, _mm512_sub_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int16x32, _mm512_sub_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_uint16x32, _mm512_mullo_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_int16x32, _mm512_mullo_epi16) + +inline v_uint8x64 v_mul_wrap(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i ad = _mm512_srai_epi16(a.val, 8); + __m512i bd = _mm512_srai_epi16(b.val, 8); + __m512i p0 = _mm512_mullo_epi16(a.val, b.val); // even + __m512i p1 = _mm512_slli_epi16(_mm512_mullo_epi16(ad, bd), 8); // odd + return v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, p0, p1)); +} +inline v_int8x64 v_mul_wrap(const v_int8x64& a, const v_int8x64& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +#define OPENCV_HAL_IMPL_AVX512_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint32x16, _mm512_add_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint32x16, _mm512_sub_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int32x16, _mm512_add_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int32x16, _mm512_sub_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint64x8, _mm512_add_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint64x8, _mm512_sub_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int64x8, _mm512_add_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int64x8, _mm512_sub_epi64) + +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint32x16, _mm512_mullo_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int32x16, _mm512_mullo_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint64x8, _mm512_mullo_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int64x8, _mm512_mullo_epi64) + +/** Saturating arithmetics **/ +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint8x64, _mm512_adds_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint8x64, _mm512_subs_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int8x64, _mm512_adds_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int8x64, _mm512_subs_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint16x32, _mm512_adds_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint16x32, _mm512_subs_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int16x32, _mm512_adds_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int16x32, _mm512_subs_epi16) + +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float32x16, _mm512_add_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float32x16, _mm512_sub_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float32x16, _mm512_mul_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float32x16, _mm512_div_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float64x8, _mm512_add_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float64x8, _mm512_sub_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float64x8, _mm512_mul_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float64x8, _mm512_div_pd) + +// saturating multiply +inline v_uint8x64 operator * (const v_uint8x64& a, const v_uint8x64& b) +{ + v_uint16x32 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_int8x64 operator * (const v_int8x64& a, const v_int8x64& b) +{ + v_int16x32 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_uint16x32 operator * (const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i pl = _mm512_mullo_epi16(a.val, b.val); + __m512i ph = _mm512_mulhi_epu16(a.val, b.val); + __m512i p0 = _mm512_unpacklo_epi16(pl, ph); + __m512i p1 = _mm512_unpackhi_epi16(pl, ph); + + const __m512i m = _mm512_set1_epi32(65535); + return v_uint16x32(_mm512_packus_epi32(_mm512_min_epu32(p0, m), _mm512_min_epu32(p1, m))); +} +inline v_int16x32 operator * (const v_int16x32& a, const v_int16x32& b) +{ + __m512i pl = _mm512_mullo_epi16(a.val, b.val); + __m512i ph = _mm512_mulhi_epi16(a.val, b.val); + __m512i p0 = _mm512_unpacklo_epi16(pl, ph); + __m512i p1 = _mm512_unpackhi_epi16(pl, ph); + return v_int16x32(_mm512_packs_epi32(p0, p1)); +} + +inline v_uint8x64& operator *= (v_uint8x64& a, const v_uint8x64& b) +{ a = a * b; return a; } +inline v_int8x64& operator *= (v_int8x64& a, const v_int8x64& b) +{ a = a * b; return a; } +inline v_uint16x32& operator *= (v_uint16x32& a, const v_uint16x32& b) +{ a = a * b; return a; } +inline v_int16x32& operator *= (v_int16x32& a, const v_int16x32& b) +{ a = a * b; return a; } + +inline v_int16x32 v_mul_hi(const v_int16x32& a, const v_int16x32& b) { return v_int16x32(_mm512_mulhi_epi16(a.val, b.val)); } +inline v_uint16x32 v_mul_hi(const v_uint16x32& a, const v_uint16x32& b) { return v_uint16x32(_mm512_mulhi_epu16(a.val, b.val)); } + +// Multiply and expand +inline void v_mul_expand(const v_uint8x64& a, const v_uint8x64& b, + v_uint16x32& c, v_uint16x32& d) +{ + v_uint16x32 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x64& a, const v_int8x64& b, + v_int16x32& c, v_int16x32& d) +{ + v_int16x32 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x32& a, const v_int16x32& b, + v_int32x16& c, v_int32x16& d) +{ + v_int16x32 v0, v1; + v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1); + + c = v_reinterpret_as_s32(v0); + d = v_reinterpret_as_s32(v1); +} + +inline void v_mul_expand(const v_uint16x32& a, const v_uint16x32& b, + v_uint32x16& c, v_uint32x16& d) +{ + v_uint16x32 v0, v1; + v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1); + + c = v_reinterpret_as_u32(v0); + d = v_reinterpret_as_u32(v1); +} + +inline void v_mul_expand(const v_uint32x16& a, const v_uint32x16& b, + v_uint64x8& c, v_uint64x8& d) +{ + v_zip(v_uint64x8(_mm512_mul_epu32(a.val, b.val)), + v_uint64x8(_mm512_mul_epu32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d); +} + +inline void v_mul_expand(const v_int32x16& a, const v_int32x16& b, + v_int64x8& c, v_int64x8& d) +{ + v_zip(v_int64x8(_mm512_mul_epi32(a.val, b.val)), + v_int64x8(_mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d); +} + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_AVX512_SHIFT_OP(_Tpuvec, _Tpsvec, suffix) \ + inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); } \ + inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shl(const _Tpuvec& a) \ + { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shl(const _Tpsvec& a) \ + { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shr(const _Tpuvec& a) \ + { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shr(const _Tpsvec& a) \ + { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); } + +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint16x32, v_int16x32, epi16) +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint32x16, v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint64x8, v_int64x8, epi64) + + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_AVX512_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(&, _Tpvec, _mm512_and_##suffix) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(|, _Tpvec, _mm512_or_##suffix) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(^, _Tpvec, _mm512_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { return _Tpvec(_mm512_xor_##suffix(a.val, not_const)); } + +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint8x64, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int8x64, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint16x32, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int16x32, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint32x16, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int32x16, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint64x8, si512, _mm512_set1_epi64(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int64x8, si512, _mm512_set1_epi64(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float32x16, ps, _mm512_castsi512_ps(_mm512_set1_epi32(-1))) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float64x8, pd, _mm512_castsi512_pd(_mm512_set1_epi32(-1))) + +/** Select **/ +#define OPENCV_HAL_IMPL_AVX512_SELECT(_Tpvec, suffix, zsuf) \ + inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_mask_blend_##suffix(_mm512_cmp_##suffix##_mask(mask.val, _mm512_setzero_##zsuf(), _MM_CMPINT_EQ), a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint8x64, epi8, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int8x64, epi8, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint16x32, epi16, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int16x32, epi16, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint32x16, epi32, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int32x16, epi32, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint64x8, epi64, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int64x8, epi64, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_float32x16, ps, ps) +OPENCV_HAL_IMPL_AVX512_SELECT(v_float64x8, pd, pd) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_AVX512_CMP_INT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval)); } + +#define OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(_Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(==, _MM_CMPINT_EQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(!=, _MM_CMPINT_NE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(<, _MM_CMPINT_LT, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(>, _MM_CMPINT_NLE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(<=, _MM_CMPINT_LE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(>=, _MM_CMPINT_NLT, _Tpvec, sufcmp, sufset, tval) + +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint8x64, epu8, epi8, (char)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int8x64, epi8, epi8, (char)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint16x32, epu16, epi16, (short)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int16x32, epi16, epi16, (short)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint32x16, epu32, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int32x16, epi32, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint64x8, epu64, epi64, (int64)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int64x8, epi64, epi64, (int64)-1) + +#define OPENCV_HAL_IMPL_AVX512_CMP_FLT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_castsi512_##sufcmp(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval))); } + +#define OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(_Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(==, _CMP_EQ_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(<, _CMP_LT_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(>, _CMP_GT_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(<=, _CMP_LE_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(>=, _CMP_GE_OQ, _Tpvec, sufcmp, sufset, tval) + +OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float32x16, ps, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float64x8, pd, epi64, (int64)-1) + +inline v_float32x16 v_not_nan(const v_float32x16& a) +{ return v_float32x16(_mm512_castsi512_ps(_mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a.val, a.val, _CMP_ORD_Q), (int)-1))); } +inline v_float64x8 v_not_nan(const v_float64x8& a) +{ return v_float64x8(_mm512_castsi512_pd(_mm512_maskz_set1_epi64(_mm512_cmp_pd_mask(a.val, a.val, _CMP_ORD_Q), (int64)-1))); } + +/** min/max **/ +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint8x64, _mm512_min_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint8x64, _mm512_max_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int8x64, _mm512_min_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int8x64, _mm512_max_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint16x32, _mm512_min_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint16x32, _mm512_max_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int16x32, _mm512_min_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int16x32, _mm512_max_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint32x16, _mm512_min_epu32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint32x16, _mm512_max_epu32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int32x16, _mm512_min_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int32x16, _mm512_max_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint64x8, _mm512_min_epu64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint64x8, _mm512_max_epu64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int64x8, _mm512_min_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int64x8, _mm512_max_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float32x16, _mm512_min_ps) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float32x16, _mm512_max_ps) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float64x8, _mm512_min_pd) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float64x8, _mm512_max_pd) + +/** Rotate **/ +namespace { + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32 ), imm4 *8), + _mm512_slli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32 + 1), (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, 15), imm4 *8), + _mm512_slli_epi32( b.val, (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16), imm4 *8), + _mm512_slli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 15), (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { return v_int8x64(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, 15), imm4*8)); }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { return v_int8x64(_mm512_alignr_epi32(b.val, a.val, imm32)); }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64&) { return a; }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { return v_int8x64(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16)); }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) { return b; }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }}; +} +template inline v_int8x64 v_rotate_right(const v_int8x64& a, const v_int8x64& b) +{ + return imm >= 128 ? v_int8x64() : +#if CV_AVX_512VBMI + v_int8x64(_mm512_permutex2var_epi8(a.val, + _v512_set_epu8(0x3f + imm, 0x3e + imm, 0x3d + imm, 0x3c + imm, 0x3b + imm, 0x3a + imm, 0x39 + imm, 0x38 + imm, + 0x37 + imm, 0x36 + imm, 0x35 + imm, 0x34 + imm, 0x33 + imm, 0x32 + imm, 0x31 + imm, 0x30 + imm, + 0x2f + imm, 0x2e + imm, 0x2d + imm, 0x2c + imm, 0x2b + imm, 0x2a + imm, 0x29 + imm, 0x28 + imm, + 0x27 + imm, 0x26 + imm, 0x25 + imm, 0x24 + imm, 0x23 + imm, 0x22 + imm, 0x21 + imm, 0x20 + imm, + 0x1f + imm, 0x1e + imm, 0x1d + imm, 0x1c + imm, 0x1b + imm, 0x1a + imm, 0x19 + imm, 0x18 + imm, + 0x17 + imm, 0x16 + imm, 0x15 + imm, 0x14 + imm, 0x13 + imm, 0x12 + imm, 0x11 + imm, 0x10 + imm, + 0x0f + imm, 0x0e + imm, 0x0d + imm, 0x0c + imm, 0x0b + imm, 0x0a + imm, 0x09 + imm, 0x08 + imm, + 0x07 + imm, 0x06 + imm, 0x05 + imm, 0x04 + imm, 0x03 + imm, 0x02 + imm, 0x01 + imm, 0x00 + imm), b.val)); +#else + _v_rotate_right 15), imm/4>::eval(a, b); +#endif +} +template +inline v_int8x64 v_rotate_left(const v_int8x64& a, const v_int8x64& b) +{ + if (imm == 0) return a; + if (imm == 64) return b; + if (imm >= 128) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_permutex2var_epi8(b.val, + _v512_set_epi8(0x7f - imm,0x7e - imm,0x7d - imm,0x7c - imm,0x7b - imm,0x7a - imm,0x79 - imm,0x78 - imm, + 0x77 - imm,0x76 - imm,0x75 - imm,0x74 - imm,0x73 - imm,0x72 - imm,0x71 - imm,0x70 - imm, + 0x6f - imm,0x6e - imm,0x6d - imm,0x6c - imm,0x6b - imm,0x6a - imm,0x69 - imm,0x68 - imm, + 0x67 - imm,0x66 - imm,0x65 - imm,0x64 - imm,0x63 - imm,0x62 - imm,0x61 - imm,0x60 - imm, + 0x5f - imm,0x5e - imm,0x5d - imm,0x5c - imm,0x5b - imm,0x5a - imm,0x59 - imm,0x58 - imm, + 0x57 - imm,0x56 - imm,0x55 - imm,0x54 - imm,0x53 - imm,0x52 - imm,0x51 - imm,0x50 - imm, + 0x4f - imm,0x4e - imm,0x4d - imm,0x4c - imm,0x4b - imm,0x4a - imm,0x49 - imm,0x48 - imm, + 0x47 - imm,0x46 - imm,0x45 - imm,0x44 - imm,0x43 - imm,0x42 - imm,0x41 - imm,0x40 - imm), a.val)); +#else + return imm < 64 ? v_rotate_right<64 - imm>(b, a) : v_rotate_right<128 - imm>(v512_setzero_s8(), b); +#endif +} +template +inline v_int8x64 v_rotate_right(const v_int8x64& a) +{ + if (imm == 0) return a; + if (imm >= 64) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF >> imm, + _v512_set_epu8(0x3f + imm,0x3e + imm,0x3d + imm,0x3c + imm,0x3b + imm,0x3a + imm,0x39 + imm,0x38 + imm, + 0x37 + imm,0x36 + imm,0x35 + imm,0x34 + imm,0x33 + imm,0x32 + imm,0x31 + imm,0x30 + imm, + 0x2f + imm,0x2e + imm,0x2d + imm,0x2c + imm,0x2b + imm,0x2a + imm,0x29 + imm,0x28 + imm, + 0x27 + imm,0x26 + imm,0x25 + imm,0x24 + imm,0x23 + imm,0x22 + imm,0x21 + imm,0x20 + imm, + 0x1f + imm,0x1e + imm,0x1d + imm,0x1c + imm,0x1b + imm,0x1a + imm,0x19 + imm,0x18 + imm, + 0x17 + imm,0x16 + imm,0x15 + imm,0x14 + imm,0x13 + imm,0x12 + imm,0x11 + imm,0x10 + imm, + 0x0f + imm,0x0e + imm,0x0d + imm,0x0c + imm,0x0b + imm,0x0a + imm,0x09 + imm,0x08 + imm, + 0x07 + imm,0x06 + imm,0x05 + imm,0x04 + imm,0x03 + imm,0x02 + imm,0x01 + imm,0x00 + imm), a.val)); +#else + return v_rotate_right(a, v512_setzero_s8()); +#endif +} +template +inline v_int8x64 v_rotate_left(const v_int8x64& a) +{ + if (imm == 0) return a; + if (imm >= 64) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF << imm, + _v512_set_epi8(0x3f - imm,0x3e - imm,0x3d - imm,0x3c - imm,0x3b - imm,0x3a - imm,0x39 - imm,0x38 - imm, + 0x37 - imm,0x36 - imm,0x35 - imm,0x34 - imm,0x33 - imm,0x32 - imm,0x31 - imm,0x30 - imm, + 0x2f - imm,0x2e - imm,0x2d - imm,0x2c - imm,0x2b - imm,0x2a - imm,0x29 - imm,0x28 - imm, + 0x27 - imm,0x26 - imm,0x25 - imm,0x24 - imm,0x23 - imm,0x22 - imm,0x21 - imm,0x20 - imm, + 0x1f - imm,0x1e - imm,0x1d - imm,0x1c - imm,0x1b - imm,0x1a - imm,0x19 - imm,0x18 - imm, + 0x17 - imm,0x16 - imm,0x15 - imm,0x14 - imm,0x13 - imm,0x12 - imm,0x11 - imm,0x10 - imm, + 0x0f - imm,0x0e - imm,0x0d - imm,0x0c - imm,0x0b - imm,0x0a - imm,0x09 - imm,0x08 - imm, + 0x07 - imm,0x06 - imm,0x05 - imm,0x04 - imm,0x03 - imm,0x02 - imm,0x01 - imm,0x00 - imm), a.val)); +#else + return v_rotate_right<64 - imm>(v512_setzero_s8(), a); +#endif +} + +#define OPENCV_HAL_IMPL_AVX512_ROTATE_PM(_Tpvec, suffix) \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ return v_reinterpret_as_##suffix(v_rotate_left(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ return v_reinterpret_as_##suffix(v_rotate_right(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ return v_reinterpret_as_##suffix(v_rotate_left(v_reinterpret_as_s8(a))); } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ return v_reinterpret_as_##suffix(v_rotate_right(v_reinterpret_as_s8(a))); } + +#define OPENCV_HAL_IMPL_AVX512_ROTATE_EC(_Tpvec, suffix) \ +template \ +inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + enum { SHIFT2 = (_Tpvec::nlanes - imm) }; \ + enum { MASK = ((1 << _Tpvec::nlanes) - 1) }; \ + if (imm == 0) return a; \ + if (imm == _Tpvec::nlanes) return b; \ + if (imm >= 2*_Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << SHIFT2)&MASK, b.val), (MASK << (imm))&MASK, a.val)); \ +} \ +template \ +inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + enum { SHIFT2 = (_Tpvec::nlanes - imm) }; \ + enum { MASK = ((1 << _Tpvec::nlanes) - 1) }; \ + if (imm == 0) return a; \ + if (imm == _Tpvec::nlanes) return b; \ + if (imm >= 2*_Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << (imm))&MASK, a.val), (MASK << SHIFT2)&MASK, b.val)); \ +} \ +template \ +inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + if (imm == 0) return a; \ + if (imm >= _Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_maskz_expand_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val)); \ +} \ +template \ +inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + if (imm == 0) return a; \ + if (imm >= _Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_maskz_compress_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val)); \ +} + +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint8x64, u8) +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint16x32, u16) +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_int16x32, s16) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint64x8, epi64) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int64x8, epi64) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float64x8, pd) + +/** Reverse **/ +inline v_uint8x64 v_reverse(const v_uint8x64 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f, + 0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f, + 0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f); + return v_uint8x64(_mm512_permutexvar_epi8(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint8x64(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int8x64 v_reverse(const v_int8x64 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x32 v_reverse(const v_uint16x32 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00000001, 0x00020003, 0x00040005, 0x00060007, + 0x00080009, 0x000a000b, 0x000c000d, 0x000e000f, + 0x00100011, 0x00120013, 0x00140015, 0x00160017, + 0x00180019, 0x001a001b, 0x001c001d, 0x001e001f); + return v_uint16x32(_mm512_permutexvar_epi16(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint16x32(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int16x32 v_reverse(const v_int16x32 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x16 v_reverse(const v_uint32x16 &a) +{ + static const __m512i perm = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15); + return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val)); +} + +inline v_int32x16 v_reverse(const v_int32x16 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x16 v_reverse(const v_float32x16 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x8 v_reverse(const v_uint64x8 &a) +{ + static const __m512i perm = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + return v_uint64x8(_mm512_permutexvar_epi64(perm, a.val)); +} + +inline v_int64x8 v_reverse(const v_int64x8 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x8 v_reverse(const v_float64x8 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +////////// Reduce ///////// + +/** Reduce **/ +#define OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64(a, b) a + b +#define OPENCV_HAL_IMPL_AVX512_REDUCE_8(sctype, func, _Tpvec, ifunc, scop) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + sctype CV_DECL_ALIGNED(64) idx[2]; \ + _mm_store_si128((__m128i*)idx, _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1))); \ + return scop(idx[0], idx[1]); } +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, min, v_uint64x8, min_epu64, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, max, v_uint64x8, max_epu64, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, sum, v_uint64x8, add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, min, v_int64x8, min_epi64, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, max, v_int64x8, max_epi64, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, sum, v_int64x8, add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_8F(func, ifunc, scop) \ + inline double v_reduce_##func(const v_float64x8& a) \ + { __m256d half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + double CV_DECL_ALIGNED(64) idx[2]; \ + _mm_store_pd(idx, _mm_##ifunc(_mm256_castpd256_pd128(half), _mm256_extractf128_pd(half, 1))); \ + return scop(idx[0], idx[1]); } +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(min, min_pd, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(max, max_pd, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(sum, add_pd, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_16(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, min, v_uint32x16, min_epu32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, max, v_uint32x16, max_epu32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(int, min, v_int32x16, min_epi32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(int, max, v_int32x16, max_epi32) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_16F(func, ifunc) \ + inline float v_reduce_##func(const v_float32x16& a) \ + { __m256 half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128 quarter = _mm_##ifunc(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 3, 2))); \ + quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 0, 1))); \ + return _mm_cvtss_f32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_16F(min, min_ps) +OPENCV_HAL_IMPL_AVX512_REDUCE_16F(max, max_ps) + +inline float v_reduce_sum(const v_float32x16& a) +{ + __m256 half = _mm256_add_ps(_v512_extract_low(a.val), _v512_extract_high(a.val)); + __m128 quarter = _mm_add_ps(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1)); + quarter = _mm_hadd_ps(quarter, quarter); + return _mm_cvtss_f32(_mm_hadd_ps(quarter, quarter)); +} +inline int v_reduce_sum(const v_int32x16& a) +{ + __m256i half = _mm256_add_epi32(_v512_extract_low(a.val), _v512_extract_high(a.val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + quarter = _mm_hadd_epi32(quarter, quarter); + return _mm_cvtsi128_si32(_mm_hadd_epi32(quarter, quarter)); +} +inline uint v_reduce_sum(const v_uint32x16& a) +{ return (uint)v_reduce_sum(v_reinterpret_as_s32(a)); } + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_32(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, min, v_uint16x32, min_epu16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, max, v_uint16x32, max_epu16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(short, min, v_int16x32, min_epi16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(short, max, v_int16x32, max_epi16) + +inline int v_reduce_sum(const v_int16x32& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline uint v_reduce_sum(const v_uint16x32& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_64(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 1)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, min, v_uint8x64, min_epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, max, v_uint8x64, max_epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, min, v_int8x64, min_epi8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, max, v_int8x64, max_epi8) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(sctype, _Tpvec, suffix) \ + inline sctype v_reduce_sum(const _Tpvec& a) \ + { __m512i a16 = _mm512_add_epi16(_mm512_cvt##suffix##_epi16(_v512_extract_low(a.val)), \ + _mm512_cvt##suffix##_epi16(_v512_extract_high(a.val))); \ + a16 = _mm512_cvtepi16_epi32(_mm256_add_epi16(_v512_extract_low(a16), _v512_extract_high(a16))); \ + __m256i a8 = _mm256_add_epi32(_v512_extract_low(a16), _v512_extract_high(a16)); \ + __m128i a4 = _mm_add_epi32(_mm256_castsi256_si128(a8), _mm256_extracti128_si256(a8, 1)); \ + a4 = _mm_hadd_epi32(a4, a4); \ + return (sctype)_mm_cvtsi128_si32(_mm_hadd_epi32(a4, a4)); } +OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(uint, v_uint8x64, epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(int, v_int8x64, epi8) + +inline v_float32x16 v_reduce_sum4(const v_float32x16& a, const v_float32x16& b, + const v_float32x16& c, const v_float32x16& d) +{ + __m256 abl = _mm256_hadd_ps(_v512_extract_low(a.val), _v512_extract_low(b.val)); + __m256 abh = _mm256_hadd_ps(_v512_extract_high(a.val), _v512_extract_high(b.val)); + __m256 cdl = _mm256_hadd_ps(_v512_extract_low(c.val), _v512_extract_low(d.val)); + __m256 cdh = _mm256_hadd_ps(_v512_extract_high(c.val), _v512_extract_high(d.val)); + return v_float32x16(_v512_combine(_mm256_hadd_ps(abl, cdl), _mm256_hadd_ps(abh, cdh))); +} + +inline unsigned v_reduce_sad(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i val = _mm512_sad_epu8(a.val, b.val); + __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_int8x64& a, const v_int8x64& b) +{ + __m512i val = _mm512_set1_epi8(-128); + val = _mm512_sad_epu8(_mm512_add_epi8(a.val, val), _mm512_add_epi8(b.val, val)); + __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_uint16x32& a, const v_uint16x32& b) +{ return v_reduce_sum(v_add_wrap(a - b, b - a)); } +inline unsigned v_reduce_sad(const v_int16x32& a, const v_int16x32& b) +{ return v_reduce_sum(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)))); } +inline unsigned v_reduce_sad(const v_uint32x16& a, const v_uint32x16& b) +{ return v_reduce_sum(v_max(a, b) - v_min(a, b)); } +inline unsigned v_reduce_sad(const v_int32x16& a, const v_int32x16& b) +{ return v_reduce_sum(v_reinterpret_as_u32(v_max(a, b) - v_min(a, b))); } +inline float v_reduce_sad(const v_float32x16& a, const v_float32x16& b) +{ return v_reduce_sum((a - b) & v_float32x16(_mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffff)))); } +inline double v_reduce_sad(const v_float64x8& a, const v_float64x8& b) +{ return v_reduce_sum((a - b) & v_float64x8(_mm512_castsi512_pd(_mm512_set1_epi64(0x7fffffffffffffff)))); } + +/** Popcount **/ +inline v_uint8x64 v_popcount(const v_int8x64& a) +{ +#if CV_AVX_512BITALG + return v_uint8x64(_mm512_popcnt_epi8(a.val)); +#elif CV_AVX_512VBMI + __m512i _popcnt_table0 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + __m512i _popcnt_table1 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1); + return v_uint8x64(_mm512_sub_epi8(_mm512_permutex2var_epi8(_popcnt_table0, a.val, _popcnt_table1), _mm512_movm_epi8(_mm512_movepi8_mask(a.val)))); +#else + __m512i _popcnt_table = _mm512_set4_epi32(0x04030302, 0x03020201, 0x03020201, 0x02010100); + __m512i _popcnt_mask = _mm512_set1_epi8(0x0F); + + return v_uint8x64(_mm512_add_epi8(_mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512( a.val, _popcnt_mask)), + _mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512(_mm512_srli_epi16(a.val, 4), _popcnt_mask)))); +#endif +} +inline v_uint16x32 v_popcount(const v_int16x32& a) +{ +#if CV_AVX_512BITALG + return v_uint16x32(_mm512_popcnt_epi16(a.val)); +#elif CV_AVX_512VPOPCNTDQ + __m512i zero = _mm512_setzero_si512(); + return v_uint16x32(_mm512_packs_epi32(_mm512_popcnt_epi32(_mm512_unpacklo_epi16(a.val, zero)), + _mm512_popcnt_epi32(_mm512_unpackhi_epi16(a.val, zero)))); +#else + v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v512_setall_u16(0x00ff); +#endif +} +inline v_uint32x16 v_popcount(const v_int32x16& a) +{ +#if CV_AVX_512VPOPCNTDQ + return v_uint32x16(_mm512_popcnt_epi32(a.val)); +#else + v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v512_setall_u32(0x000000ff); +#endif +} +inline v_uint64x8 v_popcount(const v_int64x8& a) +{ +#if CV_AVX_512VPOPCNTDQ + return v_uint64x8(_mm512_popcnt_epi64(a.val)); +#else + return v_uint64x8(_mm512_sad_epu8(v_popcount(v_reinterpret_as_s8(a)).val, _mm512_setzero_si512())); +#endif +} + + +inline v_uint8x64 v_popcount(const v_uint8x64& a) { return v_popcount(v_reinterpret_as_s8 (a)); } +inline v_uint16x32 v_popcount(const v_uint16x32& a) { return v_popcount(v_reinterpret_as_s16(a)); } +inline v_uint32x16 v_popcount(const v_uint32x16& a) { return v_popcount(v_reinterpret_as_s32(a)); } +inline v_uint64x8 v_popcount(const v_uint64x8& a) { return v_popcount(v_reinterpret_as_s64(a)); } + + +////////// Other math ///////// + +/** Some frequent operations **/ +#define OPENCV_HAL_IMPL_AVX512_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_sqrt(const _Tpvec& x) \ + { return _Tpvec(_mm512_sqrt_##suffix(x.val)); } \ + inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_fma(a, a, b * b); } \ + inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_sqrt(v_fma(a, a, b * b)); } + +OPENCV_HAL_IMPL_AVX512_MULADD(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_MULADD(v_float64x8, pd) + +inline v_int32x16 v_fma(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c) +{ return a * b + c; } +inline v_int32x16 v_muladd(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c) +{ return v_fma(a, b, c); } + +inline v_float32x16 v_invsqrt(const v_float32x16& x) +{ +#if CV_AVX_512ER + return v_float32x16(_mm512_rsqrt28_ps(x.val)); +#else + v_float32x16 half = x * v512_setall_f32(0.5); + v_float32x16 t = v_float32x16(_mm512_rsqrt14_ps(x.val)); + t *= v512_setall_f32(1.5) - ((t * t) * half); + return t; +#endif +} + +inline v_float64x8 v_invsqrt(const v_float64x8& x) +{ +#if CV_AVX_512ER + return v_float64x8(_mm512_rsqrt28_pd(x.val)); +#else + return v512_setall_f64(1.) / v_sqrt(x); +// v_float64x8 half = x * v512_setall_f64(0.5); +// v_float64x8 t = v_float64x8(_mm512_rsqrt14_pd(x.val)); +// t *= v512_setall_f64(1.5) - ((t * t) * half); +// t *= v512_setall_f64(1.5) - ((t * t) * half); +// return t; +#endif +} + +/** Absolute values **/ +#define OPENCV_HAL_IMPL_AVX512_ABS(_Tpvec, _Tpuvec, suffix) \ + inline _Tpuvec v_abs(const _Tpvec& x) \ + { return _Tpuvec(_mm512_abs_##suffix(x.val)); } + +OPENCV_HAL_IMPL_AVX512_ABS(v_int8x64, v_uint8x64, epi8) +OPENCV_HAL_IMPL_AVX512_ABS(v_int16x32, v_uint16x32, epi16) +OPENCV_HAL_IMPL_AVX512_ABS(v_int32x16, v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ABS(v_int64x8, v_uint64x8, epi64) + +inline v_float32x16 v_abs(const v_float32x16& x) +{ +#ifdef _mm512_abs_pd + return v_float32x16(_mm512_abs_ps(x.val)); +#else + return v_float32x16(_mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(x.val), + _v512_set_epu64(0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, + 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)))); +#endif +} + +inline v_float64x8 v_abs(const v_float64x8& x) +{ +#ifdef _mm512_abs_pd + #if defined __GNUC__ && (__GNUC__ < 7 || (__GNUC__ == 7 && __GNUC_MINOR__ <= 3) || (__GNUC__ == 8 && __GNUC_MINOR__ <= 2)) + // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87476 + return v_float64x8(_mm512_abs_pd(_mm512_castpd_ps(x.val))); + #else + return v_float64x8(_mm512_abs_pd(x.val)); + #endif +#else + return v_float64x8(_mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(x.val), + _v512_set_epu64(0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, + 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)))); +#endif +} + +/** Absolute difference **/ +inline v_uint8x64 v_absdiff(const v_uint8x64& a, const v_uint8x64& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x32 v_absdiff(const v_uint16x32& a, const v_uint16x32& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x16 v_absdiff(const v_uint32x16& a, const v_uint32x16& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x64 v_absdiff(const v_int8x64& a, const v_int8x64& b) +{ + v_int8x64 d = v_sub_wrap(a, b); + v_int8x64 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} + +inline v_uint16x32 v_absdiff(const v_int16x32& a, const v_int16x32& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } + +inline v_uint32x16 v_absdiff(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 d = a - b; + v_int32x16 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +inline v_float32x16 v_absdiff(const v_float32x16& a, const v_float32x16& b) +{ return v_abs(a - b); } + +inline v_float64x8 v_absdiff(const v_float64x8& a, const v_float64x8& b) +{ return v_abs(a - b); } + +/** Saturating absolute difference **/ +inline v_int8x64 v_absdiffs(const v_int8x64& a, const v_int8x64& b) +{ + v_int8x64 d = a - b; + v_int8x64 m = a < b; + return (d ^ m) - m; +} +inline v_int16x32 v_absdiffs(const v_int16x32& a, const v_int16x32& b) +{ return v_max(a, b) - v_min(a, b); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x16 v_round(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(a.val)); } + +inline v_int32x16 v_round(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(a.val))); } + +inline v_int32x16 v_round(const v_float64x8& a, const v_float64x8& b) +{ return v_int32x16(_v512_combine(_mm512_cvtpd_epi32(a.val), _mm512_cvtpd_epi32(b.val))); } + +inline v_int32x16 v_trunc(const v_float32x16& a) +{ return v_int32x16(_mm512_cvttps_epi32(a.val)); } + +inline v_int32x16 v_trunc(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvttpd_epi32(a.val))); } + +#if CVT_ROUND_MODES_IMPLEMENTED +inline v_int32x16 v_floor(const v_float32x16& a) +{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)); } + +inline v_int32x16 v_floor(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC))); } + +inline v_int32x16 v_ceil(const v_float32x16& a) +{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); } + +inline v_int32x16 v_ceil(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC))); } +#else +inline v_int32x16 v_floor(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 1))); } + +inline v_int32x16 v_floor(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 1)))); } + +inline v_int32x16 v_ceil(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 2))); } + +inline v_int32x16 v_ceil(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 2)))); } +#endif + +/** To float **/ +inline v_float32x16 v_cvt_f32(const v_int32x16& a) +{ return v_float32x16(_mm512_cvtepi32_ps(a.val)); } + +inline v_float32x16 v_cvt_f32(const v_float64x8& a) +{ return v_float32x16(_mm512_cvtpd_pslo(a.val)); } + +inline v_float32x16 v_cvt_f32(const v_float64x8& a, const v_float64x8& b) +{ return v_float32x16(_v512_combine(_mm512_cvtpd_ps(a.val), _mm512_cvtpd_ps(b.val))); } + +inline v_float64x8 v_cvt_f64(const v_int32x16& a) +{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_low(a.val))); } + +inline v_float64x8 v_cvt_f64_high(const v_int32x16& a) +{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_high(a.val))); } + +inline v_float64x8 v_cvt_f64(const v_float32x16& a) +{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_low(a.val))); } + +inline v_float64x8 v_cvt_f64_high(const v_float32x16& a) +{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_high(a.val))); } + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x8 v_cvt_f64(const v_int64x8& v) +{ +#if CV_AVX_512DQ + return v_float64x8(_mm512_cvtepi64_pd(v.val)); +#else + // constants encoded as floating-point + __m512i magic_i_lo = _mm512_set1_epi64(0x4330000000000000); // 2^52 + __m512i magic_i_hi32 = _mm512_set1_epi64(0x4530000080000000); // 2^84 + 2^63 + __m512i magic_i_all = _mm512_set1_epi64(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m512d magic_d_all = _mm512_castsi512_pd(magic_i_all); + + // Blend the 32 lowest significant bits of v with magic_int_lo + __m512i v_lo = _mm512_mask_blend_epi32(0x5555, magic_i_lo, v.val); + // Extract the 32 most significant bits of v + __m512i v_hi = _mm512_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm512_xor_si512(v_hi, magic_i_hi32); + // Compute in double precision + __m512d v_hi_dbl = _mm512_sub_pd(_mm512_castsi512_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m512d result = _mm512_add_pd(v_hi_dbl, _mm512_castsi512_pd(v_lo)); + return v_float64x8(result); +#endif +} + +////////////// Lookup table access //////////////////// + +inline v_int8x64 v512_lut(const schar* tab, const int* idx) +{ + __m128i p0 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 1)); + __m128i p1 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1)); + __m128i p2 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 2), (const int *)tab, 1)); + __m128i p3 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 3), (const int *)tab, 1)); + return v_int8x64(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(p0), p1, 1), p2, 2), p3, 3)); +} +inline v_int8x64 v512_lut_pairs(const schar* tab, const int* idx) +{ + __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 1)); + __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1)); + return v_int8x64(_v512_combine(p0, p1)); +} +inline v_int8x64 v512_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x64(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 1)); +} +inline v_uint8x64 v512_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut((const schar *)tab, idx)); } +inline v_uint8x64 v512_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x64 v512_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_quads((const schar *)tab, idx)); } + +inline v_int16x32 v512_lut(const short* tab, const int* idx) +{ + __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 2)); + __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 2)); + return v_int16x32(_v512_combine(p0, p1)); +} +inline v_int16x32 v512_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x32(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 2)); +} +inline v_int16x32 v512_lut_quads(const short* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 2)); +#else + return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 2)); +#endif +} +inline v_uint16x32 v512_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut((const short *)tab, idx)); } +inline v_uint16x32 v512_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_pairs((const short *)tab, idx)); } +inline v_uint16x32 v512_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_quads((const short *)tab, idx)); } + +inline v_int32x16 v512_lut(const int* tab, const int* idx) +{ + return v_int32x16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), tab, 4)); +} +inline v_int32x16 v512_lut_pairs(const int* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 4)); +#else + return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 4)); +#endif +} +inline v_int32x16 v512_lut_quads(const int* tab, const int* idx) +{ + return v_int32x16(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512( + _mm_loadu_si128((const __m128i*)(tab + idx[0]))), + _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1), + _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2), + _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3)); +} +inline v_uint32x16 v512_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut((const int *)tab, idx)); } +inline v_uint32x16 v512_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_pairs((const int *)tab, idx)); } +inline v_uint32x16 v512_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_quads((const int *)tab, idx)); } + +inline v_int64x8 v512_lut(const int64* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 8)); +#else + return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), tab , 8)); +#endif +} +inline v_int64x8 v512_lut_pairs(const int64* tab, const int* idx) +{ + return v_int64x8(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512( + _mm_loadu_si128((const __m128i*)(tab + idx[0]))), + _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1), + _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2), + _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3)); +} +inline v_uint64x8 v512_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut((const int64 *)tab, idx)); } +inline v_uint64x8 v512_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut_pairs((const int64 *)tab, idx)); } + +inline v_float32x16 v512_lut(const float* tab, const int* idx) +{ + return v_float32x16(_mm512_i32gather_ps(_mm512_loadu_si512((const __m512i*)idx), tab, 4)); +} +inline v_float32x16 v512_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_pairs((const int *)tab, idx)); } +inline v_float32x16 v512_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_quads((const int *)tab, idx)); } + +inline v_float64x8 v512_lut(const double* tab, const int* idx) +{ + return v_float64x8(_mm512_i32gather_pd(_mm256_loadu_si256((const __m256i*)idx), tab, 8)); +} +inline v_float64x8 v512_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x8(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_castpd128_pd512( + _mm_loadu_pd(tab + idx[0])), + _mm_loadu_pd(tab + idx[1]), 1), + _mm_loadu_pd(tab + idx[2]), 2), + _mm_loadu_pd(tab + idx[3]), 3)); +} + +inline v_int32x16 v_lut(const int* tab, const v_int32x16& idxvec) +{ + return v_int32x16(_mm512_i32gather_epi32(idxvec.val, tab, 4)); +} + +inline v_uint32x16 v_lut(const unsigned* tab, const v_int32x16& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x16 v_lut(const float* tab, const v_int32x16& idxvec) +{ + return v_float32x16(_mm512_i32gather_ps(idxvec.val, tab, 4)); +} + +inline v_float64x8 v_lut(const double* tab, const v_int32x16& idxvec) +{ + return v_float64x8(_mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x16& idxvec, v_float32x16& x, v_float32x16& y) +{ + x.val = _mm512_i32gather_ps(idxvec.val, tab, 4); + y.val = _mm512_i32gather_ps(idxvec.val, &tab[1], 4); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x16& idxvec, v_float64x8& x, v_float64x8& y) +{ + x.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8); + y.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), &tab[1], 8); +} + +inline v_int8x64 v_interleave_pairs(const v_int8x64& vec) +{ + return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0d0e0c, 0x0b090a08, 0x07050604, 0x03010200))); +} +inline v_uint8x64 v_interleave_pairs(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x64 v_interleave_quads(const v_int8x64& vec) +{ + return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0b0e0a, 0x0d090c08, 0x07030602, 0x05010400))); +} +inline v_uint8x64 v_interleave_quads(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x32 v_interleave_pairs(const v_int16x32& vec) +{ + return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0b0a, 0x0d0c0908, 0x07060302, 0x05040100))); +} +inline v_uint16x32 v_interleave_pairs(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x32 v_interleave_quads(const v_int16x32& vec) +{ + return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0706, 0x0d0c0504, 0x0b0a0302, 0x09080100))); +} +inline v_uint16x32 v_interleave_quads(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x16 v_interleave_pairs(const v_int32x16& vec) +{ + return v_int32x16(_mm512_shuffle_epi32(vec.val, _MM_PERM_ACBD)); +} +inline v_uint32x16 v_interleave_pairs(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x16 v_interleave_pairs(const v_float32x16& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x64 v_pack_triplets(const v_int8x64& vec) +{ + return v_int8x64(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), + _mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0xffffff0f, 0x0e0d0c0a, 0x09080605, 0x04020100)))); +} +inline v_uint8x64 v_pack_triplets(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x32 v_pack_triplets(const v_int16x32& vec) +{ + return v_int16x32(_mm512_permutexvar_epi16(_v512_set_epu64(0x001f001f001f001f, 0x001f001f001f001f, 0x001e001d001c001a, 0x0019001800160015, + 0x0014001200110010, 0x000e000d000c000a, 0x0009000800060005, 0x0004000200010000), vec.val)); +} +inline v_uint16x32 v_pack_triplets(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x16 v_pack_triplets(const v_int32x16& vec) +{ + return v_int32x16(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val)); +} +inline v_uint32x16 v_pack_triplets(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); } +inline v_float32x16 v_pack_triplets(const v_float32x16& vec) +{ + return v_float32x16(_mm512_permutexvar_ps(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val)); +} + +////////// Matrix operations ///////// + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b) +{ return v_int32x16(_mm512_madd_epi16(a.val, b.val)); } +inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b) +{ + __m512i even = _mm512_mul_epi32(a.val, b.val); + __m512i odd = _mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32)); + return v_int64x8(_mm512_add_epi64(even, odd)); +} +inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i even_a = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, _mm512_setzero_si512()); + __m512i odd_a = _mm512_srli_epi16(a.val, 8); + + __m512i even_b = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b.val, _mm512_setzero_si512()); + __m512i odd_b = _mm512_srli_epi16(b.val, 8); + + __m512i prod0 = _mm512_madd_epi16(even_a, even_b); + __m512i prod1 = _mm512_madd_epi16(odd_a, odd_b); + return v_uint32x16(_mm512_add_epi32(prod0, prod1)); +} +inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b) +{ + __m512i even_a = _mm512_srai_epi16(_mm512_bslli_epi128(a.val, 1), 8); + __m512i odd_a = _mm512_srai_epi16(a.val, 8); + + __m512i even_b = _mm512_srai_epi16(_mm512_bslli_epi128(b.val, 1), 8); + __m512i odd_b = _mm512_srai_epi16(b.val, 8); + + __m512i prod0 = _mm512_madd_epi16(even_a, even_b); + __m512i prod1 = _mm512_madd_epi16(odd_a, odd_b); + return v_int32x16(_mm512_add_epi32(prod0, prod1)); +} +inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i mullo = _mm512_mullo_epi16(a.val, b.val); + __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val); + __m512i mul0 = _mm512_unpacklo_epi16(mullo, mulhi); + __m512i mul1 = _mm512_unpackhi_epi16(mullo, mulhi); + + __m512i p02 = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512()); + __m512i p13 = _mm512_srli_epi64(mul0, 32); + __m512i p46 = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512()); + __m512i p57 = _mm512_srli_epi64(mul1, 32); + + __m512i p15_ = _mm512_add_epi64(p02, p13); + __m512i p9d_ = _mm512_add_epi64(p46, p57); + + return v_uint64x8(_mm512_add_epi64( + _mm512_unpacklo_epi64(p15_, p9d_), + _mm512_unpackhi_epi64(p15_, p9d_) + )); +} +inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b) +{ + __m512i prod = _mm512_madd_epi16(a.val, b.val); + __m512i even = _mm512_srai_epi64(_mm512_bslli_epi128(prod, 4), 32); + __m512i odd = _mm512_srai_epi64(prod, 32); + return v_int64x8(_mm512_add_epi64(even, odd)); +} +inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b) +{ return v_dotprod(a, b); } +inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b) +{ return v_dotprod(a, b); } +inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i mullo = _mm512_mullo_epi16(a.val, b.val); + __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val); + __m512i mul0 = _mm512_unpacklo_epi16(mullo, mulhi); + __m512i mul1 = _mm512_unpackhi_epi16(mullo, mulhi); + + __m512i p02 = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512()); + __m512i p13 = _mm512_srli_epi64(mul0, 32); + __m512i p46 = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512()); + __m512i p57 = _mm512_srli_epi64(mul1, 32); + + __m512i p15_ = _mm512_add_epi64(p02, p13); + __m512i p9d_ = _mm512_add_epi64(p46, p57); + return v_uint64x8(_mm512_add_epi64(p15_, p9d_)); +} +inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c) +{ return v_dotprod_expand(a, b) + c; } + + +#define OPENCV_HAL_AVX512_SPLAT2_PS(a, im) \ + v_float32x16(_mm512_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im))) + +inline v_float32x16 v_matmul(const v_float32x16& v, + const v_float32x16& m0, const v_float32x16& m1, + const v_float32x16& m2, const v_float32x16& m3) +{ + v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0); + v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1); + v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2); + v_float32x16 v37 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 3); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3))); +} + +inline v_float32x16 v_matmuladd(const v_float32x16& v, + const v_float32x16& m0, const v_float32x16& m1, + const v_float32x16& m2, const v_float32x16& a) +{ + v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0); + v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1); + v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a))); +} + +#define OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ + inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ + { \ + __m512i t0 = cast_from(_mm512_unpacklo_##suffix(a0.val, a1.val)); \ + __m512i t1 = cast_from(_mm512_unpacklo_##suffix(a2.val, a3.val)); \ + __m512i t2 = cast_from(_mm512_unpackhi_##suffix(a0.val, a1.val)); \ + __m512i t3 = cast_from(_mm512_unpackhi_##suffix(a2.val, a3.val)); \ + b0.val = cast_to(_mm512_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm512_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm512_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm512_unpackhi_epi64(t2, t3)); \ + } + +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_uint32x16, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_int32x16, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_float32x16, ps, _mm512_castps_si512, _mm512_castsi512_ps) + +//////////////// Value reordering /////////////// + +/* Expand */ +#define OPENCV_HAL_IMPL_AVX512_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(_v512_extract_low(a.val)); \ + b1.val = intrin(_v512_extract_high(a.val)); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v512_extract_low(a.val))); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v512_extract_high(a.val))); } \ + inline _Tpwvec v512_load_expand(const _Tp* ptr) \ + { \ + __m256i a = _mm256_loadu_si256((const __m256i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint8x64, v_uint16x32, uchar, _mm512_cvtepu8_epi16) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int8x64, v_int16x32, schar, _mm512_cvtepi8_epi16) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint16x32, v_uint32x16, ushort, _mm512_cvtepu16_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int16x32, v_int32x16, short, _mm512_cvtepi16_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint32x16, v_uint64x8, unsigned, _mm512_cvtepu32_epi64) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int32x16, v_int64x8, int, _mm512_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_AVX512_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v512_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_uint32x16, uchar, _mm512_cvtepu8_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_int32x16, schar, _mm512_cvtepi8_epi32) + +/* pack */ +// 16 +inline v_int8x64 v_pack(const v_int16x32& a, const v_int16x32& b) +{ return v_int8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } + +inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b) +{ + const __m512i t = _mm512_set1_epi16(255); + return v_uint8x64(_v512_combine(_mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, t)), _mm512_cvtepi16_epi8(_mm512_min_epu16(b.val, t)))); +} + +inline v_uint8x64 v_pack_u(const v_int16x32& a, const v_int16x32& b) +{ + return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val))); +} + +inline void v_pack_store(schar* ptr, const v_int16x32& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(uchar* ptr, const v_uint16x32& a) +{ + const __m512i m = _mm512_set1_epi16(255); + _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, m))); +} + +inline void v_pack_u_store(uchar* ptr, const v_int16x32& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + +template inline +v_uint8x64 v_rshr_pack(const v_uint16x32& a, const v_uint16x32& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1))); + return v_pack_u(v_reinterpret_as_s16((a + delta) >> n), + v_reinterpret_as_s16((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x32& a) +{ + v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1))); + v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n)); +} + +template inline +v_uint8x64 v_rshr_pack_u(const v_int16x32& a, const v_int16x32& b) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x32& a) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int8x64 v_rshr_pack(const v_int16x32& a, const v_int16x32& b) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x32& a) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + v_pack_store(ptr, (a + delta) >> n); +} + +// 32 +inline v_int16x32 v_pack(const v_int32x16& a, const v_int32x16& b) +{ return v_int16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi32(a.val, b.val))); } + +inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b) +{ + const __m512i m = _mm512_set1_epi32(65535); + return v_uint16x32(_v512_combine(_mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m)), _mm512_cvtepi32_epi16(_mm512_min_epu32(b.val, m)))); +} + +inline v_uint16x32 v_pack_u(const v_int32x16& a, const v_int32x16& b) +{ return v_uint16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi32(a.val, b.val))); } + +inline void v_pack_store(short* ptr, const v_int32x16& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x16& a) +{ + const __m512i m = _mm512_set1_epi32(65535); + _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m))); +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x16& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + + +template inline +v_uint16x32 v_rshr_pack(const v_uint32x16& a, const v_uint32x16& b) +{ + v_uint32x16 delta = v512_setall_u32(1 << (n-1)); + return v_pack_u(v_reinterpret_as_s32((a + delta) >> n), + v_reinterpret_as_s32((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x16& a) +{ + v_uint32x16 delta = v512_setall_u32(1 << (n-1)); + v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n)); +} + +template inline +v_uint16x32 v_rshr_pack_u(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x16& a) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int16x32 v_rshr_pack(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x16& a) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// 64 +// Non-saturating pack +inline v_uint32x16 v_pack(const v_uint64x8& a, const v_uint64x8& b) +{ return v_uint32x16(_v512_combine(_mm512_cvtepi64_epi32(a.val), _mm512_cvtepi64_epi32(b.val))); } + +inline v_int32x16 v_pack(const v_int64x8& a, const v_int64x8& b) +{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); } + +inline void v_pack_store(unsigned* ptr, const v_uint64x8& a) +{ _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi64_epi32(a.val)); } + +inline void v_pack_store(int* ptr, const v_int64x8& b) +{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); } + +template inline +v_uint32x16 v_rshr_pack(const v_uint64x8& a, const v_uint64x8& b) +{ + v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x8& a) +{ + v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +template inline +v_int32x16 v_rshr_pack(const v_int64x8& a, const v_int64x8& b) +{ + v_int64x8 delta = v512_setall_s64((int64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x8& a) +{ + v_int64x8 delta = v512_setall_s64((int64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// pack boolean +inline v_uint8x64 v_pack_b(const v_uint16x32& a, const v_uint16x32& b) +{ return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } + +inline v_uint8x64 v_pack_b(const v_uint32x16& a, const v_uint32x16& b, + const v_uint32x16& c, const v_uint32x16& d) +{ + __m512i ab = _mm512_packs_epi32(a.val, b.val); + __m512i cd = _mm512_packs_epi32(c.val, d.val); + + return v_uint8x64(_mm512_permutexvar_epi32(_v512_set_epu32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0), _mm512_packs_epi16(ab, cd))); +} + +inline v_uint8x64 v_pack_b(const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c, + const v_uint64x8& d, const v_uint64x8& e, const v_uint64x8& f, + const v_uint64x8& g, const v_uint64x8& h) +{ + __m512i ab = _mm512_packs_epi32(a.val, b.val); + __m512i cd = _mm512_packs_epi32(c.val, d.val); + __m512i ef = _mm512_packs_epi32(e.val, f.val); + __m512i gh = _mm512_packs_epi32(g.val, h.val); + + __m512i abcd = _mm512_packs_epi32(ab, cd); + __m512i efgh = _mm512_packs_epi32(ef, gh); + + return v_uint8x64(_mm512_permutexvar_epi16(_v512_set_epu16(31, 23, 15, 7, 30, 22, 14, 6, 29, 21, 13, 5, 28, 20, 12, 4, + 27, 19, 11, 3, 26, 18, 10, 2, 25, 17, 9, 1, 24, 16, 8, 0), _mm512_packs_epi16(abcd, efgh))); +} + +/* Recombine */ +// its up there with load and store operations + +/* Extract */ +#define OPENCV_HAL_IMPL_AVX512_EXTRACT(_Tpvec) \ + template \ + inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ + { return v_rotate_right(a, b); } + +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint8x64) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int8x64) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint16x32) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int16x32) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint64x8) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int64x8) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float64x8) + +#define OPENCV_HAL_IMPL_AVX512_EXTRACT_N(_Tpvec, _Tp) \ +template inline _Tp v_extract_n(_Tpvec v) { return v_rotate_right(v).get0(); } + +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint8x64, uchar) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int8x64, schar) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint16x32, ushort) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int16x32, short) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint32x16, uint) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int32x16, int) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint64x8, uint64) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int64x8, int64) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float32x16, float) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float64x8, double) + +template +inline v_uint32x16 v_broadcast_element(v_uint32x16 a) +{ + static const __m512i perm = _mm512_set1_epi32((char)i); + return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val)); +} + +template +inline v_int32x16 v_broadcast_element(const v_int32x16 &a) +{ return v_reinterpret_as_s32(v_broadcast_element(v_reinterpret_as_u32(a))); } + +template +inline v_float32x16 v_broadcast_element(const v_float32x16 &a) +{ return v_reinterpret_as_f32(v_broadcast_element(v_reinterpret_as_u32(a))); } + + +///////////////////// load deinterleave ///////////////////////////// + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96, + 94, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, + 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97, + 95, 93, 91, 89, 87, 85, 83, 81, 79, 77, 75, 73, 71, 69, 67, 65, + 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask0, ab1)); + b = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask1, ab1)); +#else + __m512i mask0 = _mm512_set4_epi32(0x0f0d0b09, 0x07050301, 0x0e0c0a08, 0x06040200); + __m512i a0b0 = _mm512_shuffle_epi8(ab0, mask0); + __m512i a1b1 = _mm512_shuffle_epi8(ab1, mask0); + __m512i mask1 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask2 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask1, a1b1)); + b = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask2, a1b1)); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask0, ab1)); + b = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask0, ab1)); + b = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask0, ab1)); + b = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 128)); + +#if CV_AVX_512VBMI2 + __m512i mask0 = _v512_set_epu8(126, 123, 120, 117, 114, 111, 108, 105, 102, 99, 96, 93, 90, 87, 84, 81, + 78, 75, 72, 69, 66, 63, 60, 57, 54, 51, 48, 45, 42, 39, 36, 33, + 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0, 62, 59, 56, 53, 50, + 47, 44, 41, 38, 35, 32, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2); + __m512i r0b01 = _mm512_permutex2var_epi8(bgr0, mask0, bgr1); + __m512i b1g12 = _mm512_permutex2var_epi8(bgr1, mask0, bgr2); + __m512i r12b2 = _mm512_permutex2var_epi8(bgr1, + _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101, 98, 95, 92, 89, 86, 83, 80, + 77, 74, 71, 68, 65, 127, 124, 121, 118, 115, 112, 109, 106, 103, 100, 97, + 94, 91, 88, 85, 82, 79, 76, 73, 70, 67, 64, 61, 58, 55, 52, 49, + 46, 43, 40, 37, 34, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1), bgr2); + a = v_uint8x64(_mm512_mask_compress_epi8(r12b2, 0xffffffffffe00000, r0b01)); + b = v_uint8x64(_mm512_mask_compress_epi8(b1g12, 0x2492492492492492, bgr0)); + c = v_uint8x64(_mm512_mask_expand_epi8(r0b01, 0xffffffffffe00000, r12b2)); +#elif CV_AVX_512VBMI + __m512i b0g0b1 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr1, bgr0); + __m512i g1r1g2 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr2, bgr1); + __m512i r2b2r0 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr0, bgr2); + a = v_uint8x64(_mm512_permutex2var_epi8(b0g0b1, _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101, 98, 95, 92, 89, 86, 83, 80, + 77, 74, 71, 68, 65, 63, 61, 60, 58, 57, 55, 54, 52, 51, 49, 48, + 46, 45, 43, 42, 40, 39, 37, 36, 34, 33, 31, 30, 28, 27, 25, 24, + 23, 21, 20, 18, 17, 15, 14, 12, 11, 9, 8, 6, 5, 3, 2, 0), bgr2)); + b = v_uint8x64(_mm512_permutex2var_epi8(g1r1g2, _v512_set_epu8( 63, 61, 60, 58, 57, 55, 54, 52, 51, 49, 48, 46, 45, 43, 42, 40, + 39, 37, 36, 34, 33, 31, 30, 28, 27, 25, 24, 23, 21, 20, 18, 17, + 15, 14, 12, 11, 9, 8, 6, 5, 3, 2, 0, 126, 123, 120, 117, 114, + 111, 108, 105, 102, 99, 96, 93, 90, 87, 84, 81, 78, 75, 72, 69, 66), bgr0)); + c = v_uint8x64(_mm512_permutex2var_epi8(r2b2r0, _v512_set_epu8( 63, 60, 57, 54, 51, 48, 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, + 15, 12, 9, 6, 3, 0, 125, 122, 119, 116, 113, 110, 107, 104, 101, 98, + 95, 92, 89, 86, 83, 80, 77, 74, 71, 68, 65, 62, 59, 56, 53, 50, + 47, 44, 41, 38, 35, 32, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2), bgr1)); +#else + __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48, + 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0); + + __m512i b0g0 = _mm512_mask_blend_epi32(0xf800, b01g1, r12b2); + __m512i r0b1 = _mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17, + 14, 11, 8, 5, 2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0); + __m512i g1r1 = _mm512_alignr_epi32(r12b2, g20r0, 11); + a = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b0g0, r0b1)); + c = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, r0b1, g1r1)); + b = v_uint8x64(_mm512_shuffle_epi8(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1r1, b0g0), _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001))); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + + __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48, + 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0); + + a = v_uint16x32(_mm512_mask_blend_epi32(0xf800, b01g1, r12b2)); + b = v_uint16x32(_mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17, + 14, 11, 8, 5, 2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0)); + c = v_uint16x32(_mm512_alignr_epi32(r12b2, g20r0, 11)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + + __m512i mask0 = _v512_set_epu32(29, 26, 23, 20, 17, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01r1 = _mm512_permutex2var_epi32(bgr0, mask0, bgr1); + __m512i g12b2 = _mm512_permutex2var_epi32(bgr1, mask0, bgr2); + __m512i r20g0 = _mm512_permutex2var_epi32(bgr2, mask0, bgr0); + + a = v_uint32x16(_mm512_mask_blend_epi32(0xf800, b01r1, g12b2)); + b = v_uint32x16(_mm512_alignr_epi32(g12b2, r20g0, 11)); + c = v_uint32x16(_mm512_permutex2var_epi32(bgr1, _v512_set_epu32(21, 20, 19, 18, 17, 16, 13, 10, 7, 4, 1, 26, 25, 24, 23, 22), r20g0)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + + __m512i mask0 = _v512_set_epu64(13, 10, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi64(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi64(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi64(bgr2, mask0, bgr0); + + a = v_uint64x8(_mm512_mask_blend_epi64(0xc0, b01g1, r12b2)); + c = v_uint64x8(_mm512_alignr_epi64(r12b2, g20r0, 6)); + b = v_uint64x8(_mm512_permutex2var_epi64(bgr1, _v512_set_epu64(10, 9, 8, 5, 2, 13, 12, 11), g20r0)); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c, v_uint8x64& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 128)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 192)); + +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96, + 94, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, + 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97, + 95, 93, 91, 89, 87, 85, 83, 81, 79, 77, 75, 73, 71, 69, 67, 65, + 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi8(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi8(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi8(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi8(bgra2, mask1, bgra3); + + a = v_uint8x64(_mm512_permutex2var_epi8(br01, mask0, br23)); + c = v_uint8x64(_mm512_permutex2var_epi8(br01, mask1, br23)); + b = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask0, ga23)); + d = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask1, ga23)); +#else + __m512i mask = _mm512_set4_epi32(0x0f0b0703, 0x0e0a0602, 0x0d090501, 0x0c080400); + __m512i b0g0r0a0 = _mm512_shuffle_epi8(bgra0, mask); + __m512i b1g1r1a1 = _mm512_shuffle_epi8(bgra1, mask); + __m512i b2g2r2a2 = _mm512_shuffle_epi8(bgra2, mask); + __m512i b3g3r3a3 = _mm512_shuffle_epi8(bgra3, mask); + + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi32(b0g0r0a0, mask0, b1g1r1a1); + __m512i ga01 = _mm512_permutex2var_epi32(b0g0r0a0, mask1, b1g1r1a1); + __m512i br23 = _mm512_permutex2var_epi32(b2g2r2a2, mask0, b3g3r3a3); + __m512i ga23 = _mm512_permutex2var_epi32(b2g2r2a2, mask1, b3g3r3a3); + + a = v_uint8x64(_mm512_permutex2var_epi32(br01, mask0, br23)); + c = v_uint8x64(_mm512_permutex2var_epi32(br01, mask1, br23)); + b = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask0, ga23)); + d = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask1, ga23)); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c, v_uint16x32& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 96)); + + __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi16(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi16(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi16(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi16(bgra2, mask1, bgra3); + + a = v_uint16x32(_mm512_permutex2var_epi16(br01, mask0, br23)); + c = v_uint16x32(_mm512_permutex2var_epi16(br01, mask1, br23)); + b = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask0, ga23)); + d = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask1, ga23)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c, v_uint32x16& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 48)); + + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi32(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi32(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi32(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi32(bgra2, mask1, bgra3); + + a = v_uint32x16(_mm512_permutex2var_epi32(br01, mask0, br23)); + c = v_uint32x16(_mm512_permutex2var_epi32(br01, mask1, br23)); + b = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask0, ga23)); + d = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask1, ga23)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c, v_uint64x8& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 24)); + + __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi64(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi64(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi64(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi64(bgra2, mask1, bgra3); + + a = v_uint64x8(_mm512_permutex2var_epi64(br01, mask0, br23)); + c = v_uint64x8(_mm512_permutex2var_epi64(br01, mask1, br23)); + b = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask0, ga23)); + d = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask1, ga23)); +} + +///////////////////////////// store interleave ///////////////////////////////////// + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& x, const v_uint8x64& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint8x64 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 64), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 64), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), high.val); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& x, const v_uint16x32& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint16x32 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 32), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 32), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), high.val); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& x, const v_uint32x16& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint32x16 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 16), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 16), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), high.val); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& x, const v_uint64x8& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint64x8 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 8), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 8), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 8), high.val); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b, const v_uint8x64& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(127, 84, 20, 126, 83, 19, 125, 82, 18, 124, 81, 17, 123, 80, 16, 122, + 79, 15, 121, 78, 14, 120, 77, 13, 119, 76, 12, 118, 75, 11, 117, 74, + 10, 116, 73, 9, 115, 72, 8, 114, 71, 7, 113, 70, 6, 112, 69, 5, + 111, 68, 4, 110, 67, 3, 109, 66, 2, 108, 65, 1, 107, 64, 0, 106); + __m512i mask1 = _v512_set_epu8( 21, 42, 105, 20, 41, 104, 19, 40, 103, 18, 39, 102, 17, 38, 101, 16, + 37, 100, 15, 36, 99, 14, 35, 98, 13, 34, 97, 12, 33, 96, 11, 32, + 95, 10, 31, 94, 9, 30, 93, 8, 29, 92, 7, 28, 91, 6, 27, 90, + 5, 26, 89, 4, 25, 88, 3, 24, 87, 2, 23, 86, 1, 22, 85, 0); + __m512i mask2 = _v512_set_epu8(106, 127, 63, 105, 126, 62, 104, 125, 61, 103, 124, 60, 102, 123, 59, 101, + 122, 58, 100, 121, 57, 99, 120, 56, 98, 119, 55, 97, 118, 54, 96, 117, + 53, 95, 116, 52, 94, 115, 51, 93, 114, 50, 92, 113, 49, 91, 112, 48, + 90, 111, 47, 89, 110, 46, 88, 109, 45, 87, 108, 44, 86, 107, 43, 85); + __m512i r2g0r0 = _mm512_permutex2var_epi8(b.val, mask0, c.val); + __m512i b0r1b1 = _mm512_permutex2var_epi8(a.val, mask1, c.val); + __m512i g1b2g2 = _mm512_permutex2var_epi8(a.val, mask2, b.val); + + __m512i bgr0 = _mm512_mask_blend_epi8(0x9249249249249249, r2g0r0, b0r1b1); + __m512i bgr1 = _mm512_mask_blend_epi8(0x9249249249249249, b0r1b1, g1b2g2); + __m512i bgr2 = _mm512_mask_blend_epi8(0x9249249249249249, g1b2g2, r2g0r0); +#else + __m512i g1g0 = _mm512_shuffle_epi8(b.val, _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001)); + __m512i b0g0 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, g1g0); + __m512i r0b1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, c.val, a.val); + __m512i g1r1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1g0, c.val); + + __m512i mask0 = _v512_set_epu16(42, 10, 31, 41, 9, 30, 40, 8, 29, 39, 7, 28, 38, 6, 27, 37, + 5, 26, 36, 4, 25, 35, 3, 24, 34, 2, 23, 33, 1, 22, 32, 0); + __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16, + 47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42); + __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58, + 26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21); + __m512i b0g0b2 = _mm512_permutex2var_epi16(b0g0, mask0, r0b1); + __m512i r1b1r0 = _mm512_permutex2var_epi16(b0g0, mask1, g1r1); + __m512i g2r2g1 = _mm512_permutex2var_epi16(r0b1, mask2, g1r1); + + __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0); + __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1); + __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2); +#endif + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 64), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 128), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 64), bgr1); + _mm512_store_si512((__m512i*)(ptr + 128), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 128), bgr2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b, const v_uint16x32& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu16(42, 10, 31, 41, 9, 30, 40, 8, 29, 39, 7, 28, 38, 6, 27, 37, + 5, 26, 36, 4, 25, 35, 3, 24, 34, 2, 23, 33, 1, 22, 32, 0); + __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16, + 47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42); + __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58, + 26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21); + __m512i b0g0b2 = _mm512_permutex2var_epi16(a.val, mask0, b.val); + __m512i r1b1r0 = _mm512_permutex2var_epi16(a.val, mask1, c.val); + __m512i g2r2g1 = _mm512_permutex2var_epi16(b.val, mask2, c.val); + + __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0); + __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1); + __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 32), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 64), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 32), bgr1); + _mm512_store_si512((__m512i*)(ptr + 64), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgr2); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b, const v_uint32x16& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu32(26, 31, 15, 25, 30, 14, 24, 29, 13, 23, 28, 12, 22, 27, 11, 21); + __m512i mask1 = _v512_set_epu32(31, 10, 25, 30, 9, 24, 29, 8, 23, 28, 7, 22, 27, 6, 21, 26); + __m512i g1b2g2 = _mm512_permutex2var_epi32(a.val, mask0, b.val); + __m512i r2r1b1 = _mm512_permutex2var_epi32(a.val, mask1, c.val); + + __m512i bgr0 = _mm512_mask_expand_epi32(_mm512_mask_expand_epi32(_mm512_maskz_expand_epi32(0x9249, a.val), 0x2492, b.val), 0x4924, c.val); + __m512i bgr1 = _mm512_mask_blend_epi32(0x9249, r2r1b1, g1b2g2); + __m512i bgr2 = _mm512_mask_blend_epi32(0x9249, g1b2g2, r2r1b1); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 16), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 32), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 16), bgr1); + _mm512_store_si512((__m512i*)(ptr + 32), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgr2); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu64( 5, 12, 7, 4, 11, 6, 3, 10); + __m512i mask1 = _v512_set_epu64(15, 7, 4, 14, 6, 3, 13, 5); + __m512i r1b1b2 = _mm512_permutex2var_epi64(a.val, mask0, c.val); + __m512i g2r2g1 = _mm512_permutex2var_epi64(b.val, mask1, c.val); + + __m512i bgr0 = _mm512_mask_expand_epi64(_mm512_mask_expand_epi64(_mm512_maskz_expand_epi64(0x49, a.val), 0x92, b.val), 0x24, c.val); + __m512i bgr1 = _mm512_mask_blend_epi64(0xdb, g2r2g1, r1b1b2); + __m512i bgr2 = _mm512_mask_blend_epi64(0xdb, r1b1b2, g2r2g1); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 8), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 16), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 8), bgr1); + _mm512_store_si512((__m512i*)(ptr + 16), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 8), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgr2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b, + const v_uint8x64& c, const v_uint8x64& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint8x64 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint8x64 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 192), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 192), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 192), bgra3.val); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b, + const v_uint16x32& c, const v_uint16x32& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint16x32 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint16x32 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 96), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 96), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 96), bgra3.val); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b, + const v_uint32x16& c, const v_uint32x16& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint32x16 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint32x16 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 48), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 48), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 48), bgra3.val); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b, + const v_uint64x8& c, const v_uint64x8& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint64x8 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint64x8 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 24), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 24), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 24), bgra3.val); + } +} + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int8x64, schar, s8, v_uint8x64, uchar, u8) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int16x32, short, s16, v_uint16x32, ushort, u16) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int32x16, int, s32, v_uint32x16, unsigned, u32) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float32x16, float, f32, v_uint32x16, unsigned, u32) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int64x8, int64, s64, v_uint64x8, uint64, u64) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float64x8, double, f64, v_uint64x8, uint64, u64) + +////////// Mask and checks ///////// + +/** Mask **/ +inline int64 v_signmask(const v_int8x64& a) { return (int64)_mm512_movepi8_mask(a.val); } +inline int v_signmask(const v_int16x32& a) { return (int)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline int v_signmask(const v_int32x16& a) { return (int)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline int v_signmask(const v_int64x8& a) { return (int)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } + +inline int64 v_signmask(const v_uint8x64& a) { return v_signmask(v_reinterpret_as_s8(a)); } +inline int v_signmask(const v_uint16x32& a) { return v_signmask(v_reinterpret_as_s16(a)); } +inline int v_signmask(const v_uint32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_uint64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); } +inline int v_signmask(const v_float32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); } + +/** Checks **/ +inline bool v_check_all(const v_int8x64& a) { return !(bool)_mm512_cmp_epi8_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int8x64& a) { return (bool)_mm512_movepi8_mask(a.val); } +inline bool v_check_all(const v_int16x32& a) { return !(bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int16x32& a) { return (bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline bool v_check_all(const v_int32x16& a) { return !(bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int32x16& a) { return (bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline bool v_check_all(const v_int64x8& a) { return !(bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int64x8& a) { return (bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } + +inline bool v_check_all(const v_float32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_float32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_float64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_float64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); } +inline bool v_check_all(const v_uint8x64& a) { return v_check_all(v_reinterpret_as_s8(a)); } +inline bool v_check_all(const v_uint16x32& a) { return v_check_all(v_reinterpret_as_s16(a)); } +inline bool v_check_all(const v_uint32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_uint64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_uint8x64& a) { return v_check_any(v_reinterpret_as_s8(a)); } +inline bool v_check_any(const v_uint16x32& a) { return v_check_any(v_reinterpret_as_s16(a)); } +inline bool v_check_any(const v_uint32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_uint64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); } + +inline int v_scan_forward(const v_int8x64& a) +{ + int64 mask = _mm512_movepi8_mask(a.val); + int mask32 = (int)mask; + return mask != 0 ? mask32 != 0 ? trailingZeros32(mask32) : 32 + trailingZeros32((int)(mask >> 32)) : 0; +} +inline int v_scan_forward(const v_uint8x64& a) { return v_scan_forward(v_reinterpret_as_s8(a)); } +inline int v_scan_forward(const v_int16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); } +inline int v_scan_forward(const v_uint16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); } +inline int v_scan_forward(const v_int32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_uint32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_float32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_int64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } +inline int v_scan_forward(const v_uint64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } +inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } + +inline void v512_cleanup() { _mm256_zeroall(); } + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_AVX_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_cpp.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_cpp.hpp new file mode 100644 index 0000000..859bfd7 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_cpp.hpp @@ -0,0 +1,2781 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_INTRIN_CPP_HPP +#define OPENCV_HAL_INTRIN_CPP_HPP + +#include +#include +#include +#include "opencv2/core/saturate.hpp" + +//! @cond IGNORED +#define CV_SIMD128_CPP 1 +#if defined(CV_FORCE_SIMD128_CPP) || defined(CV_DOXYGEN) +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +#endif +//! @endcond + +namespace cv +{ + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + +/** @addtogroup core_hal_intrin + +"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on +different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86 +architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers +containing packed values of different types. In case when there is no SIMD extension available +during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as +expected although it could be slower. + +### Types + +There are several types representing 128-bit register as a vector of packed values, each type is +implemented as a structure based on a one SIMD register. + +- cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char +- cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short +- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int +- cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32x4: four 32-bit floating point values (signed) - float +- cv::v_float64x2: two 64-bit floating point values (signed) - double + +@note +cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to +check the CV_SIMD128_64F preprocessor definition: +@code +#if CV_SIMD128_64F +//... +#endif +@endcode + +### Load and store operations + +These operations allow to set contents of the register explicitly or by loading it from some memory +block and to save contents of the register to memory block. + +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +@ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ... +- Other create methods: +@ref v_setall_s8, @ref v_setall_u8, ..., +@ref v_setzero_u8, @ref v_setzero_s8, ... +- Memory operations: +@ref v_load, @ref v_load_aligned, @ref v_load_low, @ref v_load_halves, +@ref v_store, @ref v_store_aligned, +@ref v_store_high, @ref v_store_low + +### Value reordering + +These operations allow to reorder or recombine elements in one or multiple vectors. + +- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave +- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high +- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u, +@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store +- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high +- Reverse: @ref v_reverse +- Extract: @ref v_extract + + +### Arithmetic, bitwise and comparison operations + +Element-wise binary and unary operations. + +- Arithmetics: +@ref operator +(const v_reg &a, const v_reg &b) "+", +@ref operator -(const v_reg &a, const v_reg &b) "-", +@ref operator *(const v_reg &a, const v_reg &b) "*", +@ref operator /(const v_reg &a, const v_reg &b) "/", +@ref v_mul_expand + +- Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap + +- Bitwise shifts: +@ref operator <<(const v_reg &a, int s) "<<", +@ref operator >>(const v_reg &a, int s) ">>", +@ref v_shl, @ref v_shr + +- Bitwise logic: +@ref operator &(const v_reg &a, const v_reg &b) "&", +@ref operator |(const v_reg &a, const v_reg &b) "|", +@ref operator ^(const v_reg &a, const v_reg &b) "^", +@ref operator ~(const v_reg &a) "~" + +- Comparison: +@ref operator >(const v_reg &a, const v_reg &b) ">", +@ref operator >=(const v_reg &a, const v_reg &b) ">=", +@ref operator <(const v_reg &a, const v_reg &b) "<", +@ref operator <=(const v_reg &a, const v_reg &b) "<=", +@ref operator==(const v_reg &a, const v_reg &b) "==", +@ref operator !=(const v_reg &a, const v_reg &b) "!=" + +- min/max: @ref v_min, @ref v_max + +### Reduce and mask + +Most of these operations return only one value. + +- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum, @ref v_popcount +- Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select + +### Other math + +- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude +- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs + +### Conversions + +Different type conversions and casts: + +- Rounding: @ref v_round, @ref v_floor, @ref v_ceil, @ref v_trunc, +- To float: @ref v_cvt_f32, @ref v_cvt_f64 +- Reinterpret: @ref v_reinterpret_as_u8, @ref v_reinterpret_as_s8, ... + +### Matrix operations + +In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_dotprod_fast, +@ref v_dotprod_expand, @ref v_dotprod_expand_fast, @ref v_matmul, @ref v_transpose4x4 + +### Usability + +Most operations are implemented only for some subset of the available types, following matrices +shows the applicability of different operations to the types. + +Regular integers: + +| Operations\\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 | +|-------------------|:-:|:-:|:-:|:-:|:-:|:-:| +|load, store | x | x | x | x | x | x | +|interleave | x | x | x | x | x | x | +|expand | x | x | x | x | x | x | +|expand_low | x | x | x | x | x | x | +|expand_high | x | x | x | x | x | x | +|expand_q | x | x | | | | | +|add, sub | x | x | x | x | x | x | +|add_wrap, sub_wrap | x | x | x | x | | | +|mul_wrap | x | x | x | x | | | +|mul | x | x | x | x | x | x | +|mul_expand | x | x | x | x | x | | +|compare | x | x | x | x | x | x | +|shift | | | x | x | x | x | +|dotprod | | | | x | | x | +|dotprod_fast | | | | x | | x | +|dotprod_expand | x | x | x | x | | x | +|dotprod_expand_fast| x | x | x | x | | x | +|logical | x | x | x | x | x | x | +|min, max | x | x | x | x | x | x | +|absdiff | x | x | x | x | x | x | +|absdiffs | | x | | x | | | +|reduce | x | x | x | x | x | x | +|mask | x | x | x | x | x | x | +|pack | x | x | x | x | x | x | +|pack_u | x | | x | | | | +|pack_b | x | | | | | | +|unpack | x | x | x | x | x | x | +|extract | x | x | x | x | x | x | +|rotate (lanes) | x | x | x | x | x | x | +|cvt_flt32 | | | | | | x | +|cvt_flt64 | | | | | | x | +|transpose4x4 | | | | | x | x | +|reverse | x | x | x | x | x | x | +|extract_n | x | x | x | x | x | x | +|broadcast_element | | | | | x | x | + +Big integers: + +| Operations\\Types | uint 64x2 | int 64x2 | +|-------------------|:-:|:-:| +|load, store | x | x | +|add, sub | x | x | +|shift | x | x | +|logical | x | x | +|reverse | x | x | +|extract | x | x | +|rotate (lanes) | x | x | +|cvt_flt64 | | x | +|extract_n | x | x | + +Floating point: + +| Operations\\Types | float 32x4 | float 64x2 | +|-------------------|:-:|:-:| +|load, store | x | x | +|interleave | x | | +|add, sub | x | x | +|mul | x | x | +|div | x | x | +|compare | x | x | +|min, max | x | x | +|absdiff | x | x | +|reduce | x | | +|mask | x | x | +|unpack | x | x | +|cvt_flt32 | | x | +|cvt_flt64 | x | | +|sqrt, abs | x | x | +|float math | x | x | +|transpose4x4 | x | | +|extract | x | x | +|rotate (lanes) | x | x | +|reverse | x | x | +|extract_n | x | x | +|broadcast_element | x | | + + @{ */ + +template struct v_reg +{ +//! @cond IGNORED + typedef _Tp lane_type; + enum { nlanes = n }; +// !@endcond + + /** @brief Constructor + + Initializes register with data from memory + @param ptr pointer to memory block with data for register */ + explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } + + /** @brief Constructor + + Initializes register with two 64-bit values */ + v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } + + /** @brief Constructor + + Initializes register with four 32-bit values */ + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } + + /** @brief Constructor + + Initializes register with eight 16-bit values */ + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + } + + /** @brief Constructor + + Initializes register with sixteen 8-bit values */ + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7, + _Tp s8, _Tp s9, _Tp s10, _Tp s11, + _Tp s12, _Tp s13, _Tp s14, _Tp s15) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; + s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; + } + + /** @brief Default constructor + + Does not initialize anything*/ + v_reg() {} + + /** @brief Copy constructor */ + v_reg(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + } + /** @brief Access first value + + Returns value of the first lane according to register type, for example: + @code{.cpp} + v_int32x4 r(1, 2, 3, 4); + int v = r.get0(); // returns 1 + v_uint64x2 r(1, 2); + uint64_t v = r.get0(); // returns 1 + @endcode + */ + _Tp get0() const { return s[0]; } + +//! @cond IGNORED + _Tp get(const int i) const { return s[i]; } + v_reg<_Tp, n> high() const + { + v_reg<_Tp, n> c; + int i; + for( i = 0; i < n/2; i++ ) + { + c.s[i] = s[i+(n/2)]; + c.s[i+(n/2)] = 0; + } + return c; + } + + static v_reg<_Tp, n> zero() + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = (_Tp)0; + return c; + } + + static v_reg<_Tp, n> all(_Tp s) + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = s; + return c; + } + + template v_reg<_Tp2, n2> reinterpret_as() const + { + size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); + v_reg<_Tp2, n2> c; + std::memcpy(&c.s[0], &s[0], bytes); + return c; + } + + v_reg& operator=(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + return *this; + } + + _Tp s[n]; +//! @endcond +}; + +/** @brief Sixteen 8-bit unsigned integer values */ +typedef v_reg v_uint8x16; +/** @brief Sixteen 8-bit signed integer values */ +typedef v_reg v_int8x16; +/** @brief Eight 16-bit unsigned integer values */ +typedef v_reg v_uint16x8; +/** @brief Eight 16-bit signed integer values */ +typedef v_reg v_int16x8; +/** @brief Four 32-bit unsigned integer values */ +typedef v_reg v_uint32x4; +/** @brief Four 32-bit signed integer values */ +typedef v_reg v_int32x4; +/** @brief Four 32-bit floating point values (single precision) */ +typedef v_reg v_float32x4; +/** @brief Two 64-bit floating point values (double precision) */ +typedef v_reg v_float64x2; +/** @brief Two 64-bit unsigned integer values */ +typedef v_reg v_uint64x2; +/** @brief Two 64-bit signed integer values */ +typedef v_reg v_int64x2; + +/** @brief Add values + +For all types. */ +template CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Subtract values + +For all types. */ +template CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Multiply values + +For 16- and 32-bit integer types and floating types. */ +template CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Divide values + +For floating types only. */ +template CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + + +/** @brief Bitwise AND + +Only for integer types. */ +template CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise OR + +Only for integer types. */ +template CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise XOR + +Only for integer types.*/ +template CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise NOT + +Only for integer types.*/ +template CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a); + + +#ifndef CV_DOXYGEN + +#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \ +__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(short, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(int, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \ + +#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \ +__CV_EXPAND(macro_name(float, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(double, __VA_ARGS__)) \ + +#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \ +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \ +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \ + +#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \ +template inline \ +v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return c; \ +} \ +template inline \ +v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return a; \ +} + +#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op) + +CV__HAL_INTRIN_IMPL_BIN_OP(+) +CV__HAL_INTRIN_IMPL_BIN_OP(-) +CV__HAL_INTRIN_IMPL_BIN_OP(*) +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /) + +#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \ +template CV_INLINE \ +v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return c; \ +} \ +template CV_INLINE \ +v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return a; \ +} + +#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \ +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \ +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */ + + +CV__HAL_INTRIN_IMPL_BIT_OP(&) +CV__HAL_INTRIN_IMPL_BIT_OP(|) +CV__HAL_INTRIN_IMPL_BIT_OP(^) + +#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \ +template CV_INLINE \ +v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \ + return c; \ +} \ + +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~) + +#endif // !CV_DOXYGEN + + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ +template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp2, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i]); \ + return c; \ +} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(func, cfunc) \ +inline v_reg func(const v_reg& a) \ +{ \ + v_reg c; \ + for( int i = 0; i < 4; i++ ) \ + c.s[i] = cfunc(a.s[i]); \ + return c; \ +} \ +inline v_reg func(const v_reg& a) \ +{ \ + v_reg c; \ + for( int i = 0; i < 2; i++ ) \ + { \ + c.s[i] = cfunc(a.s[i]); \ + c.s[i + 2] = 0; \ + } \ + return c; \ +} + +/** @brief Square root of elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) + +//! @cond IGNORED +OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) +//! @endcond + +/** @brief Absolute value of elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, + typename V_TypeTraits<_Tp>::abs_type) + +/** @brief Round elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_round, cvRound) + +/** @brief Floor elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_floor, cvFloor) + +/** @brief Ceil elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_ceil, cvCeil) + +/** @brief Truncate elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_trunc, int) + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ +template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i], b.s[i]); \ + return c; \ +} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ +template inline _Tp func(const v_reg<_Tp, n>& a) \ +{ \ + _Tp c = a.s[0]; \ + for( int i = 1; i < n; i++ ) \ + c = cfunc(c, a.s[i]); \ + return c; \ +} + +/** @brief Choose min values for each pair + +Scheme: +@code +{A1 A2 ...} +{B1 B2 ...} +-------------- +{min(A1,B1) min(A2,B2) ...} +@endcode +For all types except 64-bit integer. */ +OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) + +/** @brief Choose max values for each pair + +Scheme: +@code +{A1 A2 ...} +{B1 B2 ...} +-------------- +{max(A1,B1) max(A2,B2) ...} +@endcode +For all types except 64-bit integer. */ +OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) + +/** @brief Find one min value + +Scheme: +@code +{A1 A2 A3 ...} => min(A1,A2,A3,...) +@endcode +For all types except 64-bit integer and 64-bit floating point types. */ +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) + +/** @brief Find one max value + +Scheme: +@code +{A1 A2 A3 ...} => max(A1,A2,A3,...) +@endcode +For all types except 64-bit integer and 64-bit floating point types. */ +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) + +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; +/** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type + +Scheme: +@code +{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...} +@endcode +For all integer types. */ +template +inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) +{ + v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); + for (int i = 0; i < n*(int)sizeof(_Tp); i++) + b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; + return b; +} + + +//! @cond IGNORED +template +inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) +{ + for( int i = 0; i < n; i++ ) + { + minval.s[i] = std::min(a.s[i], b.s[i]); + maxval.s[i] = std::max(a.s[i], b.s[i]); + } +} +//! @endcond + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ +template \ +inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ + return c; \ +} + +/** @brief Less-than comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(<) + +/** @brief Greater-than comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(>) + +/** @brief Less-than or equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(<=) + +/** @brief Greater-than or equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(>=) + +/** @brief Equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(==) + +/** @brief Not equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(!=) + +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ +template \ +inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef _Tp2 rtype; \ + v_reg c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ + return c; \ +} + +/** @brief Add values without saturation + +For 8- and 16-bit integer values. */ +OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) + +/** @brief Subtract values without saturation + +For 8- and 16-bit integer values. */ +OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) + +/** @brief Multiply values without saturation + +For 8- and 16-bit integer values. */ +OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) + +//! @cond IGNORED +template inline T _absdiff(T a, T b) +{ + return a > b ? a - b : b - a; +} +//! @endcond + +/** @brief Absolute difference + +Returns \f$ |a - b| \f$ converted to corresponding unsigned type. +Example: +@code{.cpp} +v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1} +v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3} +@endcode +For 8-, 16-, 32-bit integer source types. */ +template +inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) +{ + typedef typename V_TypeTraits<_Tp>::abs_type rtype; + v_reg c; + const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); + for( int i = 0; i < n; i++ ) + { + rtype ua = a.s[i] ^ mask; + rtype ub = b.s[i] ^ mask; + c.s[i] = _absdiff(ua, ub); + } + return c; +} + +/** @overload + +For 32-bit floating point values */ +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +/** @overload + +For 64-bit floating point values */ +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +/** @brief Saturating absolute difference + +Returns \f$ saturate(|a - b|) \f$ . +For 8-, 16-bit signed integer source types. */ +template +inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++) + c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); + return c; +} + +/** @brief Inversed square root + +Returns \f$ 1/sqrt(a) \f$ +For floating point types only. */ +template +inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = 1.f/std::sqrt(a.s[i]); + return c; +} + +/** @brief Magnitude + +Returns \f$ sqrt(a^2 + b^2) \f$ +For floating point types only. */ +template +inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); + return c; +} + +/** @brief Square of the magnitude + +Returns \f$ a^2 + b^2 \f$ +For floating point types only. */ +template +inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; + return c; +} + +/** @brief Multiply and add + + Returns \f$ a*b + c \f$ + For floating point types and signed 32bit int only. */ +template +inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + v_reg<_Tp, n> d; + for( int i = 0; i < n; i++ ) + d.s[i] = a.s[i]*b.s[i] + c.s[i]; + return d; +} + +/** @brief A synonym for v_fma */ +template +inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + return v_fma(a, b, c); +} + +/** @brief Dot product of elements + +Multiply values in two registers and sum adjacent result pairs. + +Scheme: +@code + {A1 A2 ...} // 16-bit +x {B1 B2 ...} // 16-bit +------------- +{A1B1+A2B2 ...} // 32-bit + +@endcode +*/ +template inline v_reg::w_type, n/2> +v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; + return c; +} + +/** @brief Dot product of elements + +Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs. +Scheme: +@code + {A1 A2 ...} // 16-bit +x {B1 B2 ...} // 16-bit +------------- + {A1B1+A2B2+C1 ...} // 32-bit +@endcode +*/ +template inline v_reg::w_type, n/2> +v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::w_type, n / 2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg s; + for( int i = 0; i < (n/2); i++ ) + s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; + return s; +} + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod, but it may perform unorder sum between result pairs in some platforms, +this intrinsic can be used if the sum among all lanes is only matters +and also it should be yielding better performance on the affected platforms. + +*/ +template inline v_reg::w_type, n/2> +v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ return v_dotprod(a, b); } + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod_fast, but add a third element to the sum of adjacent pairs. +*/ +template inline v_reg::w_type, n/2> +v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::w_type, n / 2>& c) +{ return v_dotprod(a, b, c); } + +/** @brief Dot product of elements and expand + +Multiply values in two registers and expand the sum of adjacent result pairs. + +Scheme: +@code + {A1 A2 A3 A4 ...} // 8-bit +x {B1 B2 B3 B4 ...} // 8-bit +------------- + {A1B1+A2B2+A3B3+A4B4 ...} // 32-bit + +@endcode +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; + return s; +} + +/** @brief Dot product of elements + +Same as cv::v_dotprod_expand, but add a third element to the sum of adjacent pairs. +Scheme: +@code + {A1 A2 A3 A4 ...} // 8-bit +x {B1 B2 B3 B4 ...} // 8-bit +------------- + {A1B1+A2B2+A3B3+A4B4+C1 ...} // 32-bit +@endcode +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; + return s; +} + +/** @brief Fast Dot product of elements and expand + +Multiply values in two registers and expand the sum of adjacent result pairs. + +Same as cv::v_dotprod_expand, but it may perform unorder sum between result pairs in some platforms, +this intrinsic can be used if the sum among all lanes is only matters +and also it should be yielding better performance on the affected platforms. + +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ return v_dotprod_expand(a, b); } + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod_expand_fast, but add a third element to the sum of adjacent pairs. +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ return v_dotprod_expand(a, b, c); } + +/** @brief Multiply and expand + +Multiply values two registers and store results in two registers with wider pack type. +Scheme: +@code + {A B C D} // 32-bit +x {E F G H} // 32-bit +--------------- +{AE BF} // 64-bit + {CG DH} // 64-bit +@endcode +Example: +@code{.cpp} +v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2} +v_uint64x2 c, d; // results +v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8} +@endcode +Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4). +*/ +template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg::w_type, n/2>& c, + v_reg::w_type, n/2>& d) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i]*b.s[i]; + d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; + } +} + +/** @brief Multiply and extract high part + +Multiply values two registers and store high part of the results. +Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$ a*b >> 16 \f$ +*/ +template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg<_Tp, n> c; + for (int i = 0; i < n; i++) + c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); + return c; +} + +//! @cond IGNORED +template inline void v_hsum(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; + } +} +//! @endcond + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ +template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = (_Tp)(a.s[i] shift_op imm); \ + return c; \ +} + +/** @brief Bitwise shift left + +For 16-, 32- and 64-bit integer values. */ +OPENCV_HAL_IMPL_SHIFT_OP(<< ) + +/** @brief Bitwise shift right + +For 16-, 32- and 64-bit integer values. */ +OPENCV_HAL_IMPL_SHIFT_OP(>> ) + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> b; \ + for (int i = 0; i < n; i++) \ + { \ + int sIndex = i opA imm; \ + if (0 <= sIndex && sIndex < n) \ + { \ + b.s[i] = a.s[sIndex]; \ + } \ + else \ + { \ + b.s[i] = 0; \ + } \ + } \ + return b; \ +} \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for (int i = 0; i < n; i++) \ + { \ + int aIndex = i opA imm; \ + int bIndex = i opA imm opB n; \ + if (0 <= bIndex && bIndex < n) \ + { \ + c.s[i] = b.s[bIndex]; \ + } \ + else if (0 <= aIndex && aIndex < n) \ + { \ + c.s[i] = a.s[aIndex]; \ + } \ + else \ + { \ + c.s[i] = 0; \ + } \ + } \ + return c; \ +} + +/** @brief Element shift left among vector + +For all type */ +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) + +/** @brief Element shift right among vector + +For all type */ +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) + +/** @brief Sum packed values + +Scheme: +@code +{A1 A2 A3 ...} => sum{A1,A2,A3,...} +@endcode +*/ +template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) +{ + typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; + for( int i = 1; i < n; i++ ) + c += a.s[i]; + return c; +} + +/** @brief Sums all elements of each input vector, returns the vector of sums + + Scheme: + @code + result[0] = a[0] + a[1] + a[2] + a[3] + result[1] = b[0] + b[1] + b[2] + b[3] + result[2] = c[0] + c[1] + c[2] + c[3] + result[3] = d[0] + d[1] + d[2] + d[3] + @endcode +*/ +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v_float32x4 r; + r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; + r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; + r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; + r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; + return r; +} + +/** @brief Sum absolute differences of values + +Scheme: +@code +{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...} +@endcode +For all types except 64-bit types.*/ +template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); + for (int i = 1; i < n; i++) + c += _absdiff(a.s[i], b.s[i]); + return c; +} + +/** @brief Get negative values mask +@deprecated v_signmask depends on a lane count heavily and therefore isn't universal enough + +Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. +Example: +@code{.cpp} +v_int32x4 r; // set to {-1, -1, 1, 1} +int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011 +@endcode +*/ +template inline int v_signmask(const v_reg<_Tp, n>& a) +{ + int mask = 0; + for( int i = 0; i < n; i++ ) + mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; + return mask; +} + +/** @brief Get first negative lane index + +Returned value is an index of first negative lane (undefined for input of all positive values) +Example: +@code{.cpp} +v_int32x4 r; // set to {0, 0, -1, -1} +int idx = v_heading_zeros(r); // idx = 2 +@endcode +*/ +template inline int v_scan_forward(const v_reg<_Tp, n>& a) +{ + for (int i = 0; i < n; i++) + if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) + return i; + return 0; +} + +/** @brief Check if all packed values are less than zero + +Unsigned values will be casted to signed: `uchar 254 => char -2`. +*/ +template inline bool v_check_all(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) + return false; + return true; +} + +/** @brief Check if any of packed values is less than zero + +Unsigned values will be casted to signed: `uchar 254 => char -2`. +*/ +template inline bool v_check_any(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) + return true; + return false; +} + +/** @brief Per-element select (blend operation) + +Return value will be built by combining values _a_ and _b_ using the following scheme: + result[i] = mask[i] ? a[i] : b[i]; + +@note: _mask_ element values are restricted to these values: +- 0: select element from _b_ +- 0xff/0xffff/etc: select element from _a_ +(fully compatible with bitwise-based operator) +*/ +template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, + const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef V_TypeTraits<_Tp> Traits; + typedef typename Traits::int_type int_type; + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + { + int_type m = Traits::reinterpret_int(mask.s[i]); + CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc + c.s[i] = m ? a.s[i] : b.s[i]; + } + return c; +} + +/** @brief Expand values to the wider pack type + +Copy contents of register to two registers with 2x wider pack type. +Scheme: +@code + int32x4 int64x2 int64x2 +{A B C D} ==> {A B} , {C D} +@endcode */ +template inline void v_expand(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& b0, + v_reg::w_type, n/2>& b1) +{ + for( int i = 0; i < (n/2); i++ ) + { + b0.s[i] = a.s[i]; + b1.s[i] = a.s[i+(n/2)]; + } +} + +/** @brief Expand lower values to the wider pack type + +Same as cv::v_expand, but return lower half of the vector. + +Scheme: +@code + int32x4 int64x2 +{A B C D} ==> {A B} +@endcode */ +template +inline v_reg::w_type, n/2> +v_expand_low(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i]; + return b; +} + +/** @brief Expand higher values to the wider pack type + +Same as cv::v_expand_low, but expand higher half of the vector instead. + +Scheme: +@code + int32x4 int64x2 +{A B C D} ==> {C D} +@endcode */ +template +inline v_reg::w_type, n/2> +v_expand_high(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i+(n/2)]; + return b; +} + +//! @cond IGNORED +template inline v_reg::int_type, n> + v_reinterpret_as_int(const v_reg<_Tp, n>& a) +{ + v_reg::int_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); + return c; +} + +template inline v_reg::uint_type, n> + v_reinterpret_as_uint(const v_reg<_Tp, n>& a) +{ + v_reg::uint_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); + return c; +} +//! @endcond + +/** @brief Interleave two vectors + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +--------------- + {A1 B1 A2 B2} and {A3 B3 A4 B4} +@endcode +For all types except 64-bit. +*/ +template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, + v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) +{ + int i; + for( i = 0; i < n/2; i++ ) + { + b0.s[i*2] = a0.s[i]; + b0.s[i*2+1] = a1.s[i]; + } + for( ; i < n; i++ ) + { + b1.s[i*2-n] = a0.s[i]; + b1.s[i*2-n+1] = a1.s[i]; + } +} + +/** @brief Load register contents from memory + +@param ptr pointer to memory block with data +@return register object + +@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc. + +@note Alignment requirement: +if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). +Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). + */ +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); +} + +/** @brief Load register contents from memory (aligned) + +similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc) + */ +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) +{ + CV_Assert(isAligned::nlanes128>)>(ptr)); + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); +} + +/** @brief Load 64-bits of data to lower part (high part is undefined). + +@param ptr memory block containing data for first half (0..n/2) + +@code{.cpp} +int lo[2] = { 1, 2 }; +v_int32x4 r = v_load_low(lo); +@endcode + */ +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +/** @brief Load register contents from two memory blocks + +@param loptr memory block containing data for first half (0..n/2) +@param hiptr memory block containing data for second half (n/2..n) + +@code{.cpp} +int lo[2] = { 1, 2 }, hi[2] = { 3, 4 }; +v_int32x4 r = v_load_halves(lo, hi); +@endcode + */ +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(loptr)); + CV_Assert(isAligned(hiptr)); +#endif + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = loptr[i]; + c.s[i+c.nlanes/2] = hiptr[i]; + } + return c; +} + +/** @brief Load register contents from memory with double expand + +Same as cv::v_load, but result pack type will be 2x wider than memory type. + +@code{.cpp} +short buf[4] = {1, 2, 3, 4}; // type is int16 +v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32 +@endcode +For 8-, 16-, 32-bit integer source types. */ +template +inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> +v_load_expand(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg::nlanes128> c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +/** @brief Load register contents from memory with quad expand + +Same as cv::v_load_expand, but result type is 4 times wider than source. +@code{.cpp} +char buf[4] = {1, 2, 3, 4}; // type is int8 +v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32 +@endcode +For 8-bit integer source types. */ +template +inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> +v_load_expand_q(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg::nlanes128> c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +/** @brief Load and deinterleave (2 channels) + +Load data from memory deinterleave and store to 2 registers. +Scheme: +@code +{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...} +@endcode +For all types except 64-bit. */ +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + a.s[i] = ptr[i2]; + b.s[i] = ptr[i2+1]; + } +} + +/** @brief Load and deinterleave (3 channels) + +Load data from memory deinterleave and store to 3 registers. +Scheme: +@code +{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} +@endcode +For all types except 64-bit. */ +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + a.s[i] = ptr[i3]; + b.s[i] = ptr[i3+1]; + c.s[i] = ptr[i3+2]; + } +} + +/** @brief Load and deinterleave (4 channels) + +Load data from memory deinterleave and store to 4 registers. +Scheme: +@code +{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} +@endcode +For all types except 64-bit. */ +template +inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, + v_reg<_Tp, n>& d) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + a.s[i] = ptr[i4]; + b.s[i] = ptr[i4+1]; + c.s[i] = ptr[i4+2]; + d.s[i] = ptr[i4+3]; + } +} + +/** @brief Interleave and store (2 channels) + +Interleave and store data from 2 registers to memory. +Scheme: +@code +{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...} +@endcode +For all types except 64-bit. */ +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + ptr[i2] = a.s[i]; + ptr[i2+1] = b.s[i]; + } +} + +/** @brief Interleave and store (3 channels) + +Interleave and store data from 3 registers to memory. +Scheme: +@code +{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...} +@endcode +For all types except 64-bit. */ +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + ptr[i3] = a.s[i]; + ptr[i3+1] = b.s[i]; + ptr[i3+2] = c.s[i]; + } +} + +/** @brief Interleave and store (4 channels) + +Interleave and store data from 4 registers to memory. +Scheme: +@code +{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...} +@endcode +For all types except 64-bit. */ +template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + const v_reg<_Tp, n>& d, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + ptr[i4] = a.s[i]; + ptr[i4+1] = b.s[i]; + ptr[i4+2] = c.s[i]; + ptr[i4+3] = d.s[i]; + } +} + +/** @brief Store data to memory + +Store register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {A B C D} +@endcode +Pointer can be unaligned. */ +template +inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_store(ptr, a); +} + +/** @brief Store data to memory (lower half) + +Store lower half of register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {A B} +@endcode */ +template +inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i]; +} + +/** @brief Store data to memory (higher half) + +Store higher half of register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {C D} +@endcode */ +template +inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i+(n/2)]; +} + +/** @brief Store data to memory (aligned) + +Store register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {A B C D} +@endcode +Pointer __should__ be aligned by 16-byte boundary. */ +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +template +inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +/** @brief Combine vector from first elements of two vectors + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +--------------- + {A1 A2 B1 B2} +@endcode +For all types except 64-bit. */ +template +inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i]; + c.s[i+(n/2)] = b.s[i]; + } + return c; +} + +/** @brief Combine vector from last elements of two vectors + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +--------------- + {A3 A4 B3 B4} +@endcode +For all types except 64-bit. */ +template +inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i+(n/2)]; + c.s[i+(n/2)] = b.s[i+(n/2)]; + } + return c; +} + +/** @brief Combine two vectors from lower and higher parts of two other vectors + +@code{.cpp} +low = cv::v_combine_low(a, b); +high = cv::v_combine_high(a, b); +@endcode */ +template +inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) +{ + for( int i = 0; i < (n/2); i++ ) + { + low.s[i] = a.s[i]; + low.s[i+(n/2)] = b.s[i]; + high.s[i] = a.s[i+(n/2)]; + high.s[i+(n/2)] = b.s[i+(n/2)]; + } +} + +/** @brief Vector reverse order + +Reverse the order of the vector +Scheme: +@code + REG {A1 ... An} ==> REG {An ... A1} +@endcode +For all types. */ +template +inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[n-i-1]; + return c; +} + +/** @brief Vector extract + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +======================== +shift = 1 {A2 A3 A4 B1} +shift = 2 {A3 A4 B1 B2} +shift = 3 {A4 B1 B2 B3} +@endcode +Restriction: 0 <= shift < nlanes + +Usage: +@code +v_int32x4 a, b, c; +c = v_extract<2>(a, b); +@endcode +For all types. */ +template +inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> r; + const int shift = n - s; + int i = 0; + for (; i < shift; ++i) + r.s[i] = a.s[i+s]; + for (; i < n; ++i) + r.s[i] = b.s[i-shift]; + return r; +} + +/** @brief Vector extract + +Scheme: +Return the s-th element of v. +Restriction: 0 <= s < nlanes + +Usage: +@code +v_int32x4 a; +int r; +r = v_extract_n<2>(a); +@endcode +For all types. */ +template +inline _Tp v_extract_n(const v_reg<_Tp, n>& v) +{ + CV_DbgAssert(s >= 0 && s < n); + return v.s[s]; +} + +/** @brief Broadcast i-th element of vector + +Scheme: +@code +{ v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] } +@endcode +Restriction: 0 <= i < nlanes +Supported types: 32-bit integers and floats (s32/u32/f32) + */ +template +inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a) +{ + CV_DbgAssert(i >= 0 && i < n); + return v_reg<_Tp, n>::all(a.s[i]); +} + +/** @brief Round + +Rounds each value. Input type is float vector ==> output type is int vector.*/ +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvRound(a.s[i]); + return c; +} + +/** @overload */ +template inline v_reg v_round(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = cvRound(b.s[i]); + } + return c; +} + +/** @brief Floor + +Floor each value. Input type is float vector ==> output type is int vector.*/ +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvFloor(a.s[i]); + return c; +} + +/** @brief Ceil + +Ceil each value. Input type is float vector ==> output type is int vector.*/ +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvCeil(a.s[i]); + return c; +} + +/** @brief Trunc + +Truncate each value. Input type is float vector ==> output type is int vector.*/ +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (int)(a.s[i]); + return c; +} + +/** @overload */ +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @overload */ +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvFloor(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @overload */ +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvCeil(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @overload */ +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvCeil(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @brief Convert to float + +Supported input type is cv::v_int32x4. */ +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (float)a.s[i]; + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = (float)b.s[i]; + } + return c; +} + +/** @brief Convert to double + +Supported input type is cv::v_int32x4. */ +CV_INLINE v_reg v_cvt_f64(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +/** @brief Convert to double high part of vector + +Supported input type is cv::v_int32x4. */ +CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i + 2]; + return c; +} + +/** @brief Convert to double + +Supported input type is cv::v_float32x4. */ +CV_INLINE v_reg v_cvt_f64(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +/** @brief Convert to double high part of vector + +Supported input type is cv::v_float32x4. */ +CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i + 2]; + return c; +} + +/** @brief Convert to double + +Supported input type is cv::v_int64x2. */ +CV_INLINE v_reg v_cvt_f64(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +/** @brief Convert to double high part of vector + +Supported input type is cv::v_int64x2. */ +CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + + +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i]]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 2] + i % 2]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 4] + i % 4]; + return c; +} + +template inline v_reg v_lut(const int* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) +{ + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const float* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const double* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + + +template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[4*i ] = vec.s[4*i ]; + c.s[4*i+1] = vec.s[4*i+2]; + c.s[4*i+2] = vec.s[4*i+1]; + c.s[4*i+3] = vec.s[4*i+3]; + } + return c; +} + +template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/8; i++) + { + c.s[8*i ] = vec.s[8*i ]; + c.s[8*i+1] = vec.s[8*i+4]; + c.s[8*i+2] = vec.s[8*i+1]; + c.s[8*i+3] = vec.s[8*i+5]; + c.s[8*i+4] = vec.s[8*i+2]; + c.s[8*i+5] = vec.s[8*i+6]; + c.s[8*i+6] = vec.s[8*i+3]; + c.s[8*i+7] = vec.s[8*i+7]; + } + return c; +} + +template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[3*i ] = vec.s[4*i ]; + c.s[3*i+1] = vec.s[4*i+1]; + c.s[3*i+2] = vec.s[4*i+2]; + } + return c; +} + +/** @brief Transpose 4x4 matrix + +Scheme: +@code +a0 {A1 A2 A3 A4} +a1 {B1 B2 B3 B4} +a2 {C1 C2 C3 C4} +a3 {D1 D2 D3 D4} +=============== +b0 {A1 B1 C1 D1} +b1 {A2 B2 C2 D2} +b2 {A3 B3 C3 D3} +b3 {A4 B4 C4 D4} +@endcode +*/ +template +inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, + const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, + v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, + v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) +{ + b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); + b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); + b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); + b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); +} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } + +//! @name Init with zero +//! @{ +//! @brief Create new vector with zero elements +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } + +//! @name Init with value +//! @{ +//! @brief Create new vector with elements set to a specific value +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ +template inline _Tpvec \ + v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ +{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } + +//! @name Reinterpret +//! @{ +//! @brief Convert vector to different type without modifying underlying data. +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return a << n; } + +//! @name Left shift +//! @{ +//! @brief Shift left +OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) +OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) +OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return a >> n; } + +//! @name Right shift +//! @{ +//! @brief Shift right +OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) +OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) +OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ \ + _Tpvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + return c; \ +} + +//! @name Rounding shift +//! @{ +//! @brief Rounding shift right +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ +inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpnvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + { \ + c.s[i] = cast<_Tpn>(a.s[i]); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ + } \ + return c; \ +} + +//! @name Pack +//! @{ +//! @brief Pack values from two vectors to one +//! +//! Return vector type have twice more elements than input vector types. Variant with _u_ suffix also +//! converts to corresponding unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpnvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + { \ + c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + } \ + return c; \ +} + +//! @name Pack with rounding shift +//! @{ +//! @brief Pack values from two vectors to one with rounding shift +//! +//! Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower +//! type and returned in the result vector. Variant with _u_ suffix converts to unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +{ \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + ptr[i] = cast<_Tpn>(a.s[i]); \ +} + +//! @name Pack and store +//! @{ +//! @brief Store values from the input vector into memory with pack +//! +//! Values will be stored into memory with conversion to narrower type. +//! Variant with _u_ suffix converts to corresponding unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +{ \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ +} + +//! @name Pack and store with rounding shift +//! @{ +//! @brief Store values from the input vector into memory with pack +//! +//! Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into +//! memory. Variant with _u_ suffix converts to unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +//! @} + +//! @cond IGNORED +template +inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + for (int i = 0; i < n; ++i) + { + mptr[i] = (_Tpm)a.s[i]; + mptr[i + n] = (_Tpm)b.s[i]; + } +} +//! @endcond + +//! @name Pack boolean values +//! @{ +//! @brief Pack boolean values from multiple vectors to one unsigned 8-bit integer vector +//! +//! @note Must provide valid boolean values to guarantee same result for all architectures. + +/** @brief +//! For 16-bit boolean values + +Scheme: +@code +a {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0} +b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF} +=============== +{ + 0xFF 0 0 0xFF 0 0xFF 0xFF 0 + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + return mask; +} + +/** @overload +For 32-bit boolean values + +Scheme: +@code +a {0xFFFF.. 0 0 0xFFFF..} +b {0 0xFFFF.. 0xFFFF.. 0} +c {0xFFFF.. 0 0xFFFF.. 0} +d {0 0xFFFF.. 0 0xFFFF..} +=============== +{ + 0xFF 0 0 0xFF 0 0xFF 0xFF 0 + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 8, c, d); + return mask; +} + +/** @overload +For 64-bit boolean values + +Scheme: +@code +a {0xFFFF.. 0} +b {0 0xFFFF..} +c {0xFFFF.. 0} +d {0 0xFFFF..} + +e {0xFFFF.. 0} +f {0xFFFF.. 0} +g {0 0xFFFF..} +h {0 0xFFFF..} +=============== +{ + 0xFF 0 0 0xFF 0xFF 0 0 0xFF + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 4, c, d); + _pack_b(mask.s + 8, e, f); + _pack_b(mask.s + 12, g, h); + return mask; +} +//! @} + +/** @brief Matrix multiplication + +Scheme: +@code +{A0 A1 A2 A3} |V0| +{B0 B1 B2 B3} |V1| +{C0 C1 C2 C3} |V2| +{D0 D1 D2 D3} x |V3| +==================== +{R0 R1 R2 R3}, where: +R0 = A0V0 + A1V1 + A2V2 + A3V3, +R1 = B0V0 + B1V1 + B2V2 + B3V3 +... +@endcode +*/ +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], + v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], + v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], + v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); +} + +/** @brief Matrix multiplication and add + +Scheme: +@code +{A0 A1 A2 } |V0| |D0| +{B0 B1 B2 } |V1| |D1| +{C0 C1 C2 } x |V2| + |D2| +==================== +{R0 R1 R2 R3}, where: +R0 = A0V0 + A1V1 + A2V2 + D0, +R1 = B0V0 + B1V1 + B2V2 + D1 +... +@endcode +*/ +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], + v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], + v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], + v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); +} + + +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } + +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +////// FP16 support /////// + +inline v_reg::nlanes128> +v_load_expand(const float16_t* ptr) +{ + v_reg::nlanes128> v; + for( int i = 0; i < v.nlanes; i++ ) + { + v.s[i] = ptr[i]; + } + return v; +} + +inline void +v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) +{ + for( int i = 0; i < v.nlanes; i++ ) + { + ptr[i] = float16_t(v.s[i]); + } +} + +inline void v_cleanup() {} + +//! @} + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif +} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_forward.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_forward.hpp new file mode 100644 index 0000000..979f15a --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_forward.hpp @@ -0,0 +1,191 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef CV__SIMD_FORWARD +#error "Need to pre-define forward width" +#endif + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +/** Types **/ +#if CV__SIMD_FORWARD == 1024 +// [todo] 1024 +#error "1024-long ops not implemented yet" +#elif CV__SIMD_FORWARD == 512 +// 512 +#define __CV_VX(fun) v512_##fun +#define __CV_V_UINT8 v_uint8x64 +#define __CV_V_INT8 v_int8x64 +#define __CV_V_UINT16 v_uint16x32 +#define __CV_V_INT16 v_int16x32 +#define __CV_V_UINT32 v_uint32x16 +#define __CV_V_INT32 v_int32x16 +#define __CV_V_UINT64 v_uint64x8 +#define __CV_V_INT64 v_int64x8 +#define __CV_V_FLOAT32 v_float32x16 +#define __CV_V_FLOAT64 v_float64x8 +struct v_uint8x64; +struct v_int8x64; +struct v_uint16x32; +struct v_int16x32; +struct v_uint32x16; +struct v_int32x16; +struct v_uint64x8; +struct v_int64x8; +struct v_float32x16; +struct v_float64x8; +#elif CV__SIMD_FORWARD == 256 +// 256 +#define __CV_VX(fun) v256_##fun +#define __CV_V_UINT8 v_uint8x32 +#define __CV_V_INT8 v_int8x32 +#define __CV_V_UINT16 v_uint16x16 +#define __CV_V_INT16 v_int16x16 +#define __CV_V_UINT32 v_uint32x8 +#define __CV_V_INT32 v_int32x8 +#define __CV_V_UINT64 v_uint64x4 +#define __CV_V_INT64 v_int64x4 +#define __CV_V_FLOAT32 v_float32x8 +#define __CV_V_FLOAT64 v_float64x4 +struct v_uint8x32; +struct v_int8x32; +struct v_uint16x16; +struct v_int16x16; +struct v_uint32x8; +struct v_int32x8; +struct v_uint64x4; +struct v_int64x4; +struct v_float32x8; +struct v_float64x4; +#else +// 128 +#define __CV_VX(fun) v_##fun +#define __CV_V_UINT8 v_uint8x16 +#define __CV_V_INT8 v_int8x16 +#define __CV_V_UINT16 v_uint16x8 +#define __CV_V_INT16 v_int16x8 +#define __CV_V_UINT32 v_uint32x4 +#define __CV_V_INT32 v_int32x4 +#define __CV_V_UINT64 v_uint64x2 +#define __CV_V_INT64 v_int64x2 +#define __CV_V_FLOAT32 v_float32x4 +#define __CV_V_FLOAT64 v_float64x2 +struct v_uint8x16; +struct v_int8x16; +struct v_uint16x8; +struct v_int16x8; +struct v_uint32x4; +struct v_int32x4; +struct v_uint64x2; +struct v_int64x2; +struct v_float32x4; +struct v_float64x2; +#endif + +/** Value reordering **/ + +// Expansion +void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&); +void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&); +void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&); +void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&); +void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&); +void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&); +// Low Expansion +__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&); +__CV_V_INT16 v_expand_low(const __CV_V_INT8&); +__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&); +__CV_V_INT32 v_expand_low(const __CV_V_INT16&); +__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&); +__CV_V_INT64 v_expand_low(const __CV_V_INT32&); +// High Expansion +__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&); +__CV_V_INT16 v_expand_high(const __CV_V_INT8&); +__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&); +__CV_V_INT32 v_expand_high(const __CV_V_INT16&); +__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&); +__CV_V_INT64 v_expand_high(const __CV_V_INT32&); +// Load & Low Expansion +__CV_V_UINT16 __CV_VX(load_expand)(const uchar*); +__CV_V_INT16 __CV_VX(load_expand)(const schar*); +__CV_V_UINT32 __CV_VX(load_expand)(const ushort*); +__CV_V_INT32 __CV_VX(load_expand)(const short*); +__CV_V_UINT64 __CV_VX(load_expand)(const uint*); +__CV_V_INT64 __CV_VX(load_expand)(const int*); +// Load lower 8-bit and expand into 32-bit +__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*); +__CV_V_INT32 __CV_VX(load_expand_q)(const schar*); + +// Saturating Pack +__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&); +__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&); +__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&); +__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&); +// Non-saturating Pack +__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&); +__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&); +// Pack signed integers with unsigned saturation +__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&); +__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&); + +/** Arithmetic, bitwise and comparison operations **/ + +// Non-saturating multiply +#if CV_VSX +template +Tvec v_mul_wrap(const Tvec& a, const Tvec& b); +#else +__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&); +__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&); +__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&); +__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&); +#endif + +// Multiply and expand +#if CV_VSX +template +void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d); +#else +void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&); +void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&); +void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&); +void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&); +void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&); +void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&); +#endif + +// Conversions +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a); +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a); +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a); +__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a); +__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a); + +/** Cleanup **/ +#undef CV__SIMD_FORWARD +#undef __CV_VX +#undef __CV_V_UINT8 +#undef __CV_V_INT8 +#undef __CV_V_UINT16 +#undef __CV_V_INT16 +#undef __CV_V_UINT32 +#undef __CV_V_INT32 +#undef __CV_V_UINT64 +#undef __CV_V_INT64 +#undef __CV_V_FLOAT32 +#undef __CV_V_FLOAT64 + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: \ No newline at end of file diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_msa.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_msa.hpp new file mode 100644 index 0000000..a1fbb09 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_msa.hpp @@ -0,0 +1,1887 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_HAL_INTRIN_MSA_HPP +#define OPENCV_HAL_INTRIN_MSA_HPP + +#include +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @cond IGNORED +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 + +//MSA implements 128-bit wide vector registers shared with the 64-bit wide floating-point unit registers. +//MSA and FPU can not be both present, unless the FPU has 64-bit floating-point registers. +#define CV_SIMD128_64F 1 + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(v16u8 v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = msa_ld1q_u8(v); + } + + uchar get0() const + { + return msa_getq_lane_u8(val, 0); + } + + v16u8 val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(v16i8 v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = msa_ld1q_s8(v); + } + + schar get0() const + { + return msa_getq_lane_s8(val, 0); + } + + v16i8 val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(v8u16 v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = msa_ld1q_u16(v); + } + + ushort get0() const + { + return msa_getq_lane_u16(val, 0); + } + + v8u16 val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(v8i16 v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = msa_ld1q_s16(v); + } + + short get0() const + { + return msa_getq_lane_s16(val, 0); + } + + v8i16 val; +}; + +struct v_uint32x4 +{ + typedef unsigned int lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(v4u32 v) : val(v) {} + v_uint32x4(unsigned int v0, unsigned int v1, unsigned int v2, unsigned int v3) + { + unsigned int v[] = {v0, v1, v2, v3}; + val = msa_ld1q_u32(v); + } + + unsigned int get0() const + { + return msa_getq_lane_u32(val, 0); + } + + v4u32 val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(v4i32 v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = msa_ld1q_s32(v); + } + + int get0() const + { + return msa_getq_lane_s32(val, 0); + } + + v4i32 val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(v4f32 v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = msa_ld1q_f32(v); + } + + float get0() const + { + return msa_getq_lane_f32(val, 0); + } + + v4f32 val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(v2u64 v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = msa_ld1q_u64(v); + } + + uint64 get0() const + { + return msa_getq_lane_u64(val, 0); + } + + v2u64 val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(v2i64 v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = msa_ld1q_s64(v); + } + + int64 get0() const + { + return msa_getq_lane_s64(val, 0); + } + + v2i64 val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(v2f64 v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = msa_ld1q_f64(v); + } + + double get0() const + { + return msa_getq_lane_f64(val, 0); + } + + v2f64 val; +}; + +#define OPENCV_HAL_IMPL_MSA_INIT(_Tpv, _Tp, suffix) \ +inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(msa_dupq_n_##suffix((_Tp)0)); } \ +inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(msa_dupq_n_##suffix(v)); } \ +inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(MSA_TPV_REINTERPRET(v16u8, v.val)); } \ +inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(MSA_TPV_REINTERPRET(v16i8, v.val)); } \ +inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(MSA_TPV_REINTERPRET(v8u16, v.val)); } \ +inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(MSA_TPV_REINTERPRET(v8i16, v.val)); } \ +inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(MSA_TPV_REINTERPRET(v4u32, v.val)); } \ +inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(MSA_TPV_REINTERPRET(v4i32, v.val)); } \ +inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(MSA_TPV_REINTERPRET(v2u64, v.val)); } \ +inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(MSA_TPV_REINTERPRET(v2i64, v.val)); } \ +inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(MSA_TPV_REINTERPRET(v4f32, v.val)); } \ +inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(MSA_TPV_REINTERPRET(v2f64, v.val)); } + +OPENCV_HAL_IMPL_MSA_INIT(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_INIT(int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_INIT(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_INIT(int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_INIT(uint32x4, unsigned int, u32) +OPENCV_HAL_IMPL_MSA_INIT(int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_INIT(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_INIT(int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_INIT(float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_INIT(float64x2, double, f64) + +#define OPENCV_HAL_IMPL_MSA_PACK(_Tpvec, _Tpwvec, pack, mov, rshr) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(mov(a.val, b.val)); \ +} \ +template inline \ +_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(rshr(a.val, b.val, n)); \ +} + +OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_uint16x8, pack, msa_qpack_u16, msa_qrpackr_u16) +OPENCV_HAL_IMPL_MSA_PACK(v_int8x16, v_int16x8, pack, msa_qpack_s16, msa_qrpackr_s16) +OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_uint32x4, pack, msa_qpack_u32, msa_qrpackr_u32) +OPENCV_HAL_IMPL_MSA_PACK(v_int16x8, v_int32x4, pack, msa_qpack_s32, msa_qrpackr_s32) +OPENCV_HAL_IMPL_MSA_PACK(v_uint32x4, v_uint64x2, pack, msa_pack_u64, msa_rpackr_u64) +OPENCV_HAL_IMPL_MSA_PACK(v_int32x4, v_int64x2, pack, msa_pack_s64, msa_rpackr_s64) +OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_int16x8, pack_u, msa_qpacku_s16, msa_qrpackru_s16) +OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_int32x4, pack_u, msa_qpacku_s32, msa_qrpackru_s32) + +#define OPENCV_HAL_IMPL_MSA_PACK_STORE(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = mov(a.val); \ + msa_st1_##suffix(ptr, a1); \ +} \ +template inline \ +void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = rshr(a.val, n); \ + msa_st1_##suffix(ptr, a1); \ +} + +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_uint16x8, pack, msa_qmovn_u16, msa_qrshrn_n_u16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int8x16, schar, v8i8, s8, v_int16x8, pack, msa_qmovn_s16, msa_qrshrn_n_s16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_uint32x4, pack, msa_qmovn_u32, msa_qrshrn_n_u32) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int16x8, short, v4i16, s16, v_int32x4, pack, msa_qmovn_s32, msa_qrshrn_n_s32) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint32x4, unsigned, v2u32, u32, v_uint64x2, pack, msa_movn_u64, msa_rshrn_n_u64) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int32x4, int, v2i32, s32, v_int64x2, pack, msa_movn_s64, msa_rshrn_n_s64) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_int16x8, pack_u, msa_qmovun_s16, msa_qrshrun_n_s16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_int32x4, pack_u, msa_qmovun_s32, msa_qrshrun_n_s32) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + return v_uint8x16(msa_pack_u16(a.val, b.val)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + return v_uint8x16(msa_pack_u16(msa_pack_u32(a.val, b.val), msa_pack_u32(c.val, d.val))); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v8u16 abcd = msa_pack_u32(msa_pack_u64(a.val, b.val), msa_pack_u64(c.val, d.val)); + v8u16 efgh = msa_pack_u32(msa_pack_u64(e.val, f.val), msa_pack_u64(g.val, h.val)); + return v_uint8x16(msa_pack_u16(abcd, efgh)); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + v4f32 v0 = v.val; + v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0); + res = msa_mlaq_lane_f32(res, m1.val, v0, 1); + res = msa_mlaq_lane_f32(res, m2.val, v0, 2); + res = msa_mlaq_lane_f32(res, m3.val, v0, 3); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + v4f32 v0 = v.val; + v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0); + res = msa_mlaq_lane_f32(res, m1.val, v0, 1); + res = msa_mlaq_lane_f32(res, m2.val, v0, 2); + res = msa_addq_f32(res, a.val); + return v_float32x4(res); +} + +#define OPENCV_HAL_IMPL_MSA_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint8x16, msa_qaddq_u8) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint8x16, msa_qsubq_u8) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int8x16, msa_qaddq_s8) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int8x16, msa_qsubq_s8) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint16x8, msa_qaddq_u16) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint16x8, msa_qsubq_u16) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int16x8, msa_qaddq_s16) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int16x8, msa_qsubq_s16) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int32x4, msa_addq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int32x4, msa_subq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_int32x4, msa_mulq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint32x4, msa_addq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint32x4, msa_subq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_uint32x4, msa_mulq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float32x4, msa_addq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float32x4, msa_subq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float32x4, msa_mulq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int64x2, msa_addq_s64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int64x2, msa_subq_s64) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint64x2, msa_addq_u64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint64x2, msa_subq_u64) +OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float32x4, msa_divq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float64x2, msa_addq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float64x2, msa_subq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float64x2, msa_mulq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float64x2, msa_divq_f64) + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_MSA_MUL_SAT(_Tpvec, _Tpwvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{a = a * b; return a; } + +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint16x8, v_uint32x4) + +// Multiply and expand +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v16i8 a_lo, a_hi, b_lo, b_hi; + + ILVRL_B2_SB(a.val, msa_dupq_n_s8(0), a_lo, a_hi); + ILVRL_B2_SB(b.val, msa_dupq_n_s8(0), b_lo, b_hi); + c.val = msa_mulq_s16(msa_paddlq_s8(a_lo), msa_paddlq_s8(b_lo)); + d.val = msa_mulq_s16(msa_paddlq_s8(a_hi), msa_paddlq_s8(b_hi)); +} + +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v16u8 a_lo, a_hi, b_lo, b_hi; + + ILVRL_B2_UB(a.val, msa_dupq_n_u8(0), a_lo, a_hi); + ILVRL_B2_UB(b.val, msa_dupq_n_u8(0), b_lo, b_hi); + c.val = msa_mulq_u16(msa_paddlq_u8(a_lo), msa_paddlq_u8(b_lo)); + d.val = msa_mulq_u16(msa_paddlq_u8(a_hi), msa_paddlq_u8(b_hi)); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + v8i16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); + ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi); + c.val = msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo)); + d.val = msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi)); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + v8u16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); + ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi); + c.val = msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo)); + d.val = msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi)); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + v4u32 a_lo, a_hi, b_lo, b_hi; + + ILVRL_W2_UW(a.val, msa_dupq_n_u32(0), a_lo, a_hi); + ILVRL_W2_UW(b.val, msa_dupq_n_u32(0), b_lo, b_hi); + c.val = msa_mulq_u64(msa_paddlq_u32(a_lo), msa_paddlq_u32(b_lo)); + d.val = msa_mulq_u64(msa_paddlq_u32(a_hi), msa_paddlq_u32(b_hi)); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + v8i16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); + ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi); + + return v_int16x8(msa_packr_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo)), + msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi)), 16)); +} + +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + v8u16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); + ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi); + + return v_uint16x8(msa_packr_u32(msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo)), + msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi)), 16)); +} + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(msa_dotp_s_w(a.val, b.val)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(msa_dpadd_s_w(c.val , a.val, b.val)); } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ return v_int64x2(msa_dotp_s_d(a.val, b.val)); } +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_int64x2(msa_dpadd_s_d(c.val , a.val, b.val)); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8); + v8u16 odd_a = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8); + v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8); + v8u16 odd_b = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8); + v4u32 prod = msa_dotp_u_w(even_a, even_b); + return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b)); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8); + v8u16 odd_a = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8); + v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8); + v8u16 odd_b = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8); + v4u32 prod = msa_dpadd_u_w(c.val, even_a, even_b); + return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b)); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + v8i16 prod = msa_dotp_s_h(a.val, b.val); + return v_int32x4(msa_hadd_s32(prod, prod)); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16); + v4u32 odd_a = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16); + v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16); + v4u32 odd_b = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16); + v2u64 prod = msa_dotp_u_d(even_a, even_b); + return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, + const v_uint64x2& c) +{ + v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16); + v4u32 odd_a = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16); + v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16); + v4u32 odd_b = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16); + v2u64 prod = msa_dpadd_u_d(c.val, even_a, even_b); + return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b)); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v4i32 prod = msa_dotp_s_w(a.val, b.val); + return v_int64x2(msa_hadd_s64(prod, prod)); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_IMPL_MSA_LOGIC_OP(_Tpvec, _Tpv, suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(&, _Tpvec, msa_andq_##suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(|, _Tpvec, msa_orrq_##suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(^, _Tpvec, msa_eorq_##suffix) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_u8(MSA_TPV_REINTERPRET(v16u8, a.val)))); \ +} + +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint8x16, v16u8, u8) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int8x16, v16i8, s8) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint16x8, v8u16, u16) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int16x8, v8i16, s16) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint32x4, v4u32, u32) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int32x4, v4i32, s32) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint64x2, v2u64, u64) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int64x2, v2i64, s64) + +#define OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + a.val = MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(&, msa_andq_s32) +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(|, msa_orrq_s32) +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(^, msa_eorq_s32) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val)))); +} + +/* v_abs */ +#define OPENCV_HAL_IMPL_MSA_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ +inline _Tpuvec v_abs(const _Tpsvec& a) \ +{ \ + return v_reinterpret_as_##usuffix(_Tpsvec(msa_absq_##ssuffix(a.val))); \ +} + +OPENCV_HAL_IMPL_MSA_ABS(v_uint8x16, v_int8x16, u8, s8) +OPENCV_HAL_IMPL_MSA_ABS(v_uint16x8, v_int16x8, u16, s16) +OPENCV_HAL_IMPL_MSA_ABS(v_uint32x4, v_int32x4, u32, s32) + +/* v_abs(float), v_sqrt, v_invsqrt */ +#define OPENCV_HAL_IMPL_MSA_BASIC_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a) \ +{ \ + return _Tpvec(intrin(a.val)); \ +} + +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_abs, msa_absq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_abs, msa_absq_f64) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_sqrt, msa_sqrtq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_invsqrt, msa_rsqrtq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_sqrt, msa_sqrtq_f64) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_invsqrt, msa_rsqrtq_f64) + +#define OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a.val = MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(&, msa_andq_s64) +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(|, msa_orrq_s64) +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(^, msa_eorq_s64) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val)))); +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_MSA_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_min, msa_minq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_max, msa_maxq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_min, msa_minq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_max, msa_maxq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_min, msa_minq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_max, msa_maxq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_min, msa_minq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_max, msa_maxq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_min, msa_minq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_max, msa_maxq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_min, msa_minq_s32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_max, msa_maxq_s32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_min, msa_minq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_max, msa_maxq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_min, msa_minq_f64) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_max, msa_maxq_f64) + +#define OPENCV_HAL_IMPL_MSA_INT_CMP_OP(_Tpvec, _Tpv, suffix, not_suffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ceqq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_##not_suffix(msa_ceqq_##suffix(a.val, b.val)))); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cltq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgtq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cleq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgeq_##suffix(a.val, b.val))); } + +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint8x16, v16u8, u8, u8) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int8x16, v16i8, s8, u8) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint16x8, v8u16, u16, u16) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int16x8, v8i16, s16, u16) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint32x4, v4u32, u32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int32x4, v4i32, s32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float32x4, v4f32, f32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint64x2, v2u64, u64, u64) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int64x2, v2i64, s64, u64) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float64x2, v2f64, f64, u64) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ceqq_f32(a.val, a.val))); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ceqq_f64(a.val, a.val))); } + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_add_wrap, msa_addq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_add_wrap, msa_addq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_add_wrap, msa_addq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_add_wrap, msa_addq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_sub_wrap, msa_subq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_sub_wrap, msa_subq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_sub_wrap, msa_subq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_sub_wrap, msa_subq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_mul_wrap, msa_mulq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_mul_wrap, msa_mulq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_mul_wrap, msa_mulq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_mul_wrap, msa_mulq_s16) + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_absdiff, msa_abdq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_absdiff, msa_abdq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_absdiff, msa_abdq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_absdiff, msa_abdq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_absdiff, msa_abdq_f64) + +/** Saturating absolute difference **/ +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_absdiffs, msa_qabdq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_absdiffs, msa_qabdq_s16) + +#define OPENCV_HAL_IMPL_MSA_BIN_FUNC2(_Tpvec, _Tpvec2, _Tpv, func, intrin) \ +inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec2(MSA_TPV_REINTERPRET(_Tpv, intrin(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int8x16, v_uint8x16, v16u8, v_absdiff, msa_abdq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int16x8, v_uint16x8, v8u16, v_absdiff, msa_abdq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int32x4, v_uint32x4, v4u32, v_absdiff, msa_abdq_s32) + +/* v_magnitude, v_sqr_magnitude, v_fma, v_muladd */ +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 x(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val)); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_float32x4(msa_mlaq_f32(c.val, a.val, b.val)); +} + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(msa_mlaq_s32(c.val, a.val, b.val)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val)); +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(msa_mlaq_f64(c.val, a.val, b.val)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} + +// trade efficiency for convenience +#define OPENCV_HAL_IMPL_MSA_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ return _Tpvec(msa_shlq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ return _Tpvec(msa_shrq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(msa_shlq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(msa_shrq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ return _Tpvec(msa_rshrq_n_##suffix(a.val, n)); } + +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint8x16, u8, schar, s8) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int8x16, s8, schar, s8) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint16x8, u16, short, s16) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int16x8, s16, short, s16) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint32x4, u32, int, s32) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int32x4, s32, int, s32) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint64x2, u64, int64, s64) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int64x2, s64, int64, s64) + +/* v_rotate_right, v_rotate_left */ +#define OPENCV_HAL_IMPL_MSA_ROTATE_OP(_Tpvec, _Tpv, _Tpvs, suffix) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##suffix(0), n))); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(msa_dupq_n_##suffix(0), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ \ + return a; \ +} \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), n))); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, b.val), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ \ + CV_UNUSED(b); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint64x2, v2u64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int64x2, v2i64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float64x2, v2f64, v2i64, s64) + +#define OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr), msa_dup_n_##suffix((_Tp)0))); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr0), msa_ld1_##suffix(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + int n = _Tpvec::nlanes; \ + for( int i = 0; i < (n/2); i++ ) \ + ptr[i] = a.val[i]; \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + int n = _Tpvec::nlanes; \ + for( int i = 0; i < (n/2); i++ ) \ + ptr[i] = a.val[i+(n/2)]; \ +} + +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float64x2, double, f64) + + +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + v_uint8x16 c = v_uint8x16((v16u8)__builtin_msa_vshf_b((v16i8)((v2i64){0x08090A0B0C0D0E0F, 0x0001020304050607}), msa_dupq_n_s8(0), (v16i8)a.val)); + return c; +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + v_uint16x8 c = v_uint16x8((v8u16)__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000500060007, 0x0000000100020003}), msa_dupq_n_s16(0), (v8i16)a.val)); + return c; +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + v_uint32x4 c; + c.val[0] = a.val[3]; + c.val[1] = a.val[2]; + c.val[2] = a.val[1]; + c.val[3] = a.val[0]; + return c; +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + v_uint64x2 c; + c.val[0] = a.val[1]; + c.val[1] = a.val[0]; + return c; +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(func, cfunc) \ +inline unsigned short v_reduce_##func(const v_uint16x8& a) \ +{ \ + v8u16 a_lo, a_hi; \ + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); \ + v4u32 b = msa_##func##q_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(a_hi)); \ + v4u32 b_lo, b_hi; \ + ILVRL_W2_UW(b, msa_dupq_n_u32(0), b_lo, b_hi); \ + v2u64 c = msa_##func##q_u64(msa_paddlq_u32(b_lo), msa_paddlq_u32(b_hi)); \ + return (unsigned short)cfunc(c[0], c[1]); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(min, std::min) + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(func, cfunc) \ +inline short v_reduce_##func(const v_int16x8& a) \ +{ \ + v8i16 a_lo, a_hi; \ + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); \ + v4i32 b = msa_##func##q_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(a_hi)); \ + v4i32 b_lo, b_hi; \ + ILVRL_W2_SW(b, msa_dupq_n_s32(0), b_lo, b_hi); \ + v2i64 c = msa_##func##q_s64(msa_paddlq_s32(b_lo), msa_paddlq_s32(b_hi)); \ + return (short)cfunc(c[0], c[1]); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(min, std::min) + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(_Tpvec, scalartype, func, cfunc) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + return (scalartype)cfunc(cfunc(a.val[0], a.val[1]), cfunc(a.val[2], a.val[3])); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, min, std::min) + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(_Tpvec, scalartype, _Tpvec2, func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpvec2 a1, a2; \ + v_expand(a, a1, a2); \ + return (scalartype)v_reduce_##func(v_##func(a1, a2)); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, max) + + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_SUM(_Tpvec, scalartype, suffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + return (scalartype)msa_sum_##suffix(a.val); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ return (uint64)(msa_getq_lane_u64(a.val, 0) + msa_getq_lane_u64(a.val, 1)); } +inline int64 v_reduce_sum(const v_int64x2& a) +{ return (int64)(msa_getq_lane_s64(a.val, 0) + msa_getq_lane_s64(a.val, 1)); } +inline double v_reduce_sum(const v_float64x2& a) +{ + return msa_getq_lane_f64(a.val, 0) + msa_getq_lane_f64(a.val, 1); +} + +/* v_reduce_sum4, v_reduce_sad */ +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v4f32 u0 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val)))); // a0+a1 b0+b1 a2+a3 b2+b3 + v4f32 u1 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val)))); // c0+c1 d0+d1 c2+c3 d2+d3 + + return v_float32x4(msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0))))); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + v16u8 t0 = msa_abdq_u8(a.val, b.val); + v8u16 t1 = msa_paddlq_u8(t0); + v4u32 t2 = msa_paddlq_u16(t1); + return msa_sum_u32(t2); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + v16u8 t0 = MSA_TPV_REINTERPRET(v16u8, msa_abdq_s8(a.val, b.val)); + v8u16 t1 = msa_paddlq_u8(t0); + v4u32 t2 = msa_paddlq_u16(t1); + return msa_sum_u32(t2); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v8u16 t0 = msa_abdq_u16(a.val, b.val); + v4u32 t1 = msa_paddlq_u16(t0); + return msa_sum_u32(t1); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v8u16 t0 = MSA_TPV_REINTERPRET(v8u16, msa_abdq_s16(a.val, b.val)); + v4u32 t1 = msa_paddlq_u16(t0); + return msa_sum_u32(t1); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + v4u32 t0 = msa_abdq_u32(a.val, b.val); + return msa_sum_u32(t0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + v4u32 t0 = MSA_TPV_REINTERPRET(v4u32, msa_abdq_s32(a.val, b.val)); + return msa_sum_u32(t0); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + v4f32 t0 = msa_abdq_f32(a.val, b.val); + return msa_sum_f32(t0); +} + +/* v_popcount */ +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(_Tpvec) \ +inline v_uint8x16 v_popcount(const _Tpvec& a) \ +{ \ + v16u8 t = MSA_TPV_REINTERPRET(v16u8, msa_cntq_s8(MSA_TPV_REINTERPRET(v16i8, a.val))); \ + return v_uint8x16(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_uint8x16) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_int8x16) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(_Tpvec) \ +inline v_uint16x8 v_popcount(const _Tpvec& a) \ +{ \ + v8u16 t = MSA_TPV_REINTERPRET(v8u16, msa_cntq_s16(MSA_TPV_REINTERPRET(v8i16, a.val))); \ + return v_uint16x8(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_uint16x8) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_int16x8) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(_Tpvec) \ +inline v_uint32x4 v_popcount(const _Tpvec& a) \ +{ \ + v4u32 t = MSA_TPV_REINTERPRET(v4u32, msa_cntq_s32(MSA_TPV_REINTERPRET(v4i32, a.val))); \ + return v_uint32x4(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_uint32x4) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_int32x4) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(_Tpvec) \ +inline v_uint64x2 v_popcount(const _Tpvec& a) \ +{ \ + v2u64 t = MSA_TPV_REINTERPRET(v2u64, msa_cntq_s64(MSA_TPV_REINTERPRET(v2i64, a.val))); \ + return v_uint64x2(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_uint64x2) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_int64x2) + +inline int v_signmask(const v_uint8x16& a) +{ + v8i8 m0 = msa_create_s8(CV_BIG_UINT(0x0706050403020100)); + v16u8 v0 = msa_shlq_u8(msa_shrq_n_u8(a.val, 7), msa_combine_s8(m0, m0)); + v8u16 v1 = msa_paddlq_u8(v0); + v4u32 v2 = msa_paddlq_u16(v1); + v2u64 v3 = msa_paddlq_u32(v2); + return (int)msa_getq_lane_u64(v3, 0) + ((int)msa_getq_lane_u64(v3, 1) << 8); +} +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_uint16x8& a) +{ + v4i16 m0 = msa_create_s16(CV_BIG_UINT(0x0003000200010000)); + v8u16 v0 = msa_shlq_u16(msa_shrq_n_u16(a.val, 15), msa_combine_s16(m0, m0)); + v4u32 v1 = msa_paddlq_u16(v0); + v2u64 v2 = msa_paddlq_u32(v1); + return (int)msa_getq_lane_u64(v2, 0) + ((int)msa_getq_lane_u64(v2, 1) << 4); +} +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } + +inline int v_signmask(const v_uint32x4& a) +{ + v2i32 m0 = msa_create_s32(CV_BIG_UINT(0x0000000100000000)); + v4u32 v0 = msa_shlq_u32(msa_shrq_n_u32(a.val, 31), msa_combine_s32(m0, m0)); + v2u64 v1 = msa_paddlq_u32(v0); + return (int)msa_getq_lane_u64(v1, 0) + ((int)msa_getq_lane_u64(v1, 1) << 2); +} +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } + +inline int v_signmask(const v_uint64x2& a) +{ + v2u64 v0 = msa_shrq_n_u64(a.val, 63); + return (int)msa_getq_lane_u64(v0, 0) + ((int)msa_getq_lane_u64(v0, 1) << 1); +} +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } + +#define OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(_Tpvec, _Tpvec2, suffix, shift) \ +inline bool v_check_all(const v_##_Tpvec& a) \ +{ \ + _Tpvec2 v0 = msa_shrq_n_##suffix(msa_mvnq_##suffix(a.val), shift); \ + v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \ + return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) == 0; \ +} \ +inline bool v_check_any(const v_##_Tpvec& a) \ +{ \ + _Tpvec2 v0 = msa_shrq_n_##suffix(a.val, shift); \ + v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \ + return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) != 0; \ +} + +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint8x16, v16u8, u8, 7) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint16x8, v8u16, u16, 15) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint32x4, v4u32, u32, 31) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint64x2, v2u64, u64, 63) + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } + +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +/* v_select */ +#define OPENCV_HAL_IMPL_MSA_SELECT(_Tpvec, _Tpv, _Tpvu) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_bslq_u8(MSA_TPV_REINTERPRET(_Tpvu, mask.val), \ + MSA_TPV_REINTERPRET(_Tpvu, b.val), MSA_TPV_REINTERPRET(_Tpvu, a.val)))); \ +} + +OPENCV_HAL_IMPL_MSA_SELECT(v_uint8x16, v16u8, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int8x16, v16i8, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_uint16x8, v8u16, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int16x8, v8i16, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_uint32x4, v4u32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int32x4, v4i32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_float32x4, v4f32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_float64x2, v2f64, v16u8) + +#define OPENCV_HAL_IMPL_MSA_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix, ssuffix, _Tpv, _Tpvs) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + b0.val = msa_paddlq_##suffix(a_lo); \ + b1.val = msa_paddlq_##suffix(a_hi); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + return _Tpwvec(msa_paddlq_##suffix(a_lo)); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + return _Tpwvec(msa_paddlq_##suffix(a_hi)); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(msa_movl_##suffix(msa_ld1_##suffix(ptr))); \ +} + +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint8x16, v_uint16x8, uchar, u8, s8, v16u8, v16i8) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int8x16, v_int16x8, schar, s8, s8, v16i8, v16i8) +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint16x8, v_uint32x4, ushort, u16, s16, v8u16, v8i16) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int16x8, v_int32x4, short, s16, s16, v8i16, v8i16) +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint32x4, v_uint64x2, uint, u32, s32, v4u32, v4i32) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int32x4, v_int64x2, int, s32, s32, v4i32, v4i32) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + return v_uint32x4((v4u32){ptr[0], ptr[1], ptr[2], ptr[3]}); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + return v_int32x4((v4i32){ptr[0], ptr[1], ptr[2], ptr[3]}); +} + +/* v_zip, v_combine_low, v_combine_high, v_recombine */ +#define OPENCV_HAL_IMPL_MSA_UNPACKS(_Tpvec, _Tpv, _Tpvs, ssuffix) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + c.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \ + d.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \ +} + +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_float64x2, v2f64, v2i64, s64) + +/* v_extract */ +#define OPENCV_HAL_IMPL_MSA_EXTRACT(_Tpvec, _Tpv, _Tpvs, suffix) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), s))); \ +} + +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint64x2, v2u64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int64x2, v2i64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_float64x2, v2f64, v2i64, s64) + +/* v_round, v_floor, v_ceil, v_trunc */ +inline v_int32x4 v_round(const v_float32x4& a) +{ + return v_int32x4(msa_cvttintq_s32_f32(a.val)); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v4i32 a1 = msa_cvttintq_s32_f32(a.val); + return v_int32x4(msa_addq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(msa_cvtfintq_f32_s32(a1), a.val)))); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v4i32 a1 = msa_cvttintq_s32_f32(a.val); + return v_int32x4(msa_subq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(a.val, msa_cvtfintq_f32_s32(a1))))); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + return v_int32x4(msa_cvttruncq_s32_f32(a.val)); +} + +inline v_int32x4 v_round(const v_float64x2& a) +{ + return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_cvttintq_s64_f64(b.val))); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + v2f64 a1 = msa_cvtrintq_f64(a.val); + return v_int32x4(msa_pack_s64(msa_addq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a1, a.val))), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + v2f64 a1 = msa_cvtrintq_f64(a.val); + return v_int32x4(msa_pack_s64(msa_subq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a.val, a1))), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + return v_int32x4(msa_pack_s64(msa_cvttruncq_s64_f64(a.val), msa_dupq_n_s64(0))); +} + +#define OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(_Tpvec, _Tpv, _Tpvs, ssuffix) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + _Tpv t00 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + _Tpv t01 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + _Tpv t10 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \ + _Tpv t11 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \ + b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \ + b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \ + b2.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \ + b3.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \ +} + +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_float32x4, v4f32, v4i32, s32) + +#define OPENCV_HAL_IMPL_MSA_INTERLEAVED(_Tpvec, _Tp, suffix) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + msa_ld2q_##suffix(ptr, &a.val, &b.val); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + msa_ld3q_##suffix(ptr, &a.val, &b.val, &c.val); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + msa_ld4q_##suffix(ptr, &a.val, &b.val, &c.val, &d.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + msa_st2q_##suffix(ptr, a.val, b.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + msa_st3q_##suffix(ptr, a.val, b.val, c.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + msa_st4q_##suffix(ptr, a.val, b.val, c.val, d.val); \ +} + +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(float64x2, double, f64) + +/* v_cvt_f32, v_cvt_f64, v_cvt_f64_high */ +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(msa_cvtfintq_f32_s32(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + return v_float32x4(msa_cvtfq_f32_f64(a.val, msa_dupq_n_f64(0.0f))); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(msa_cvtfq_f32_f64(a.val, b.val)); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(msa_cvtflq_f64_f32(msa_cvtfintq_f32_s32(a.val))); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(msa_cvtfhq_f64_f32(msa_cvtfintq_f32_s32(a.val))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(msa_cvtflq_f64_f32(a.val)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(msa_cvtfhq_f64_f32(a.val)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + return v_float64x2(msa_cvtfintq_f64_s64(a.val)); +} + +////////////// Lookup table access //////////////////// +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(msa_ld1q_s16(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(msa_ld1q_s16(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(msa_combine_s16(msa_ld1_s16(tab + idx[0]), msa_ld1_s16(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(msa_ld1q_s32(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(msa_combine_s32(msa_ld1_s32(tab + idx[0]), msa_ld1_s32(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(msa_ld1q_s32(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(msa_combine_s64(msa_create_s64(tab[idx[0]]), msa_create_s64(tab[idx[1]]))); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(msa_ld1q_s64(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(msa_ld1q_f32(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + uint64 CV_DECL_ALIGNED(32) elems[2] = + { + *(uint64*)(tab + idx[0]), + *(uint64*)(tab + idx[1]) + }; + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ld1q_u64(elems))); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(msa_ld1q_f32(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[msa_getq_lane_s32(idxvec.val, 0)], + tab[msa_getq_lane_s32(idxvec.val, 1)], + tab[msa_getq_lane_s32(idxvec.val, 2)], + tab[msa_getq_lane_s32(idxvec.val, 3)] + }; + return v_uint32x4(msa_ld1q_u32(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + v4f32 xy02 = msa_combine_f32(msa_ld1_f32(tab + idx[0]), msa_ld1_f32(tab + idx[2])); + v4f32 xy13 = msa_combine_f32(msa_ld1_f32(tab + idx[1]), msa_ld1_f32(tab + idx[3])); + x = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02)))); + y = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02)))); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0705060403010200, 0x0F0D0E0C0B090A08}), msa_dupq_n_s8(0), vec.val)); + return c; +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0703060205010400, 0x0F0B0E0A0D090C08}), msa_dupq_n_s8(0), vec.val)); + return c; +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0003000100020000, 0x0007000500060004}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } + +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0005000100040000, 0x0007000300060002}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + v_int32x4 c; + c.val[0] = vec.val[0]; + c.val[1] = vec.val[2]; + c.val[2] = vec.val[1]; + c.val[3] = vec.val[3]; + return c; +} + +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0908060504020100, 0x131211100E0D0C0A}), msa_dupq_n_s8(0), vec.val)); + return c; +} + +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000200010000, 0x0009000800060005}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_float64x2(msa_ld1q_f64(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(msa_ld1q_f64(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + v2f64 xy0 = msa_ld1q_f64(tab + idx[0]); + v2f64 xy1 = msa_ld1q_f64(tab + idx[1]); + x = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvevq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0)))); + y = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvodq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0)))); +} + +template +inline typename _Tp::lane_type v_extract_n(const _Tp& a) +{ + return v_rotate_right(a).get0(); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& a) +{ + return v_setall_u32(v_extract_n(a)); +} +template +inline v_int32x4 v_broadcast_element(const v_int32x4& a) +{ + return v_setall_s32(v_extract_n(a)); +} +template +inline v_float32x4 v_broadcast_element(const v_float32x4& a) +{ + return v_setall_f32(v_extract_n(a)); +} + +////// FP16 support /////// +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ +#ifndef msa_ld1_f16 + v4f16 v = (v4f16)msa_ld1_s16((const short*)ptr); +#else + v4f16 v = msa_ld1_f16((const __fp16*)ptr); +#endif + return v_float32x4(msa_cvt_f32_f16(v)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + v4f16 hv = msa_cvt_f16_f32(v.val); + +#ifndef msa_st1_f16 + msa_st1_s16((short*)ptr, (int16x4_t)hv); +#else + msa_st1_f16((__fp16*)ptr, hv); +#endif +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + float buf[4]; + for( int i = 0; i < 4; i++ ) + buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + float buf[4]; + v_store(buf, v); + for( int i = 0; i < 4; i++ ) + ptr[i] = (float16_t)buf[i]; +} +#endif + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_neon.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_neon.hpp new file mode 100644 index 0000000..280691b --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_neon.hpp @@ -0,0 +1,2346 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_INTRIN_NEON_HPP +#define OPENCV_HAL_INTRIN_NEON_HPP + +#include +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 +#if defined(__aarch64__) || defined(_M_ARM64) +#define CV_SIMD128_64F 1 +#else +#define CV_SIMD128_64F 0 +#endif + +// TODO +#define CV_NEON_DOT 0 + +//////////// Utils //////////// + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { c = vuzp1q_##suffix(a, b); d = vuzp2q_##suffix(a, b); } +#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv&a, const _Tpv&b, _Tpv& c, _Tpv& d) \ + { c = vuzp1_##suffix(a, b); d = vuzp2_##suffix(a, b); } +#else +#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { _Tpvx2 ab = vuzpq_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; } +#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { _Tpvx2 ab = vuzp_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; } +#endif + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \ + template static inline \ + _Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \ + template static inline \ + float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; } +#else +#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) +#endif + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpvl##_t, _Tpvl##x2_t, suffix) \ + OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix) + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix) + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) + +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint8x16, uint8x8, u8) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int8x16, int8x8, s8) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint16x8, uint16x4, u16) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int16x8, int16x4, s16) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint32x4, uint32x2, u32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int32x4, int32x2, s32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(float32x4, float32x2, f32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(uint64x2, uint64x1, u64) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(int64x2, int64x1, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(float64x2, float64x1,f64) +#endif + +//////////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(uint8x16_t v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = vld1q_u8(v); + } + uchar get0() const + { + return vgetq_lane_u8(val, 0); + } + + uint8x16_t val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(int8x16_t v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = vld1q_s8(v); + } + schar get0() const + { + return vgetq_lane_s8(val, 0); + } + + int8x16_t val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(uint16x8_t v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = vld1q_u16(v); + } + ushort get0() const + { + return vgetq_lane_u16(val, 0); + } + + uint16x8_t val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(int16x8_t v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = vld1q_s16(v); + } + short get0() const + { + return vgetq_lane_s16(val, 0); + } + + int16x8_t val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(uint32x4_t v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + val = vld1q_u32(v); + } + unsigned get0() const + { + return vgetq_lane_u32(val, 0); + } + + uint32x4_t val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(int32x4_t v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = vld1q_s32(v); + } + int get0() const + { + return vgetq_lane_s32(val, 0); + } + int32x4_t val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(float32x4_t v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = vld1q_f32(v); + } + float get0() const + { + return vgetq_lane_f32(val, 0); + } + float32x4_t val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(uint64x2_t v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = vld1q_u64(v); + } + uint64 get0() const + { + return vgetq_lane_u64(val, 0); + } + uint64x2_t val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(int64x2_t v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = vld1q_s64(v); + } + int64 get0() const + { + return vgetq_lane_s64(val, 0); + } + int64x2_t val; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(float64x2_t v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = vld1q_f64(v); + } + double get0() const + { + return vgetq_lane_f64(val, 0); + } + float64x2_t val; +}; +#endif + +#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \ +inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \ +inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \ +inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \ +inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \ +inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \ +inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(vreinterpretq_u16_##suffix(v.val)); } \ +inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(vreinterpretq_s16_##suffix(v.val)); } \ +inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(vreinterpretq_u32_##suffix(v.val)); } \ +inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(vreinterpretq_s32_##suffix(v.val)); } \ +inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(vreinterpretq_u64_##suffix(v.val)); } \ +inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(vreinterpretq_s64_##suffix(v.val)); } \ +inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(vreinterpretq_f32_##suffix(v.val)); } + +OPENCV_HAL_IMPL_NEON_INIT(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_INIT(int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_INIT(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_INIT(int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_INIT(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32) +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \ +inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); } +OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64) +OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8) +OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8) +OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16) +OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16) +OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32) +OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64) +OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64) +OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32) +OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + hreg a1 = mov(a.val), b1 = mov(b.val); \ + return _Tpvec(vcombine_##suffix(a1, b1)); \ +} \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = mov(a.val); \ + vst1_##suffix(ptr, a1); \ +} \ +template inline \ +_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + hreg a1 = rshr(a.val, n); \ + hreg b1 = rshr(b.val, n); \ + return _Tpvec(vcombine_##suffix(a1, b1)); \ +} \ +template inline \ +void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = rshr(a.val, n); \ + vst1_##suffix(ptr, a1); \ +} + +OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, pack, vqmovn_u16, vqrshrn_n_u16) +OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, pack, vqmovn_s16, vqrshrn_n_s16) +OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, pack, vqmovn_u32, vqrshrn_n_u32) +OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, pack, vqmovn_s32, vqrshrn_n_s32) +OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, pack, vmovn_u64, vrshrn_n_u64) +OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn_s64, vrshrn_n_s64) + +OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16) +OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + uint8x16_t ab = vcombine_u8(vmovn_u16(a.val), vmovn_u16(b.val)); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + uint16x8_t nab = vcombine_u16(vmovn_u32(a.val), vmovn_u32(b.val)); + uint16x8_t ncd = vcombine_u16(vmovn_u32(c.val), vmovn_u32(d.val)); + return v_uint8x16(vcombine_u8(vmovn_u16(nab), vmovn_u16(ncd))); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + uint32x4_t ab = vcombine_u32(vmovn_u64(a.val), vmovn_u64(b.val)); + uint32x4_t cd = vcombine_u32(vmovn_u64(c.val), vmovn_u64(d.val)); + uint32x4_t ef = vcombine_u32(vmovn_u64(e.val), vmovn_u64(f.val)); + uint32x4_t gh = vcombine_u32(vmovn_u64(g.val), vmovn_u64(h.val)); + + uint16x8_t abcd = vcombine_u16(vmovn_u32(ab), vmovn_u32(cd)); + uint16x8_t efgh = vcombine_u16(vmovn_u32(ef), vmovn_u32(gh)); + return v_uint8x16(vcombine_u8(vmovn_u16(abcd), vmovn_u16(efgh))); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val); + float32x4_t res = vmulq_lane_f32(m0.val, vl, 0); + res = vmlaq_lane_f32(res, m1.val, vl, 1); + res = vmlaq_lane_f32(res, m2.val, vh, 0); + res = vmlaq_lane_f32(res, m3.val, vh, 1); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val); + float32x4_t res = vmulq_lane_f32(m0.val, vl, 0); + res = vmlaq_lane_f32(res, m1.val, vl, 1); + res = vmlaq_lane_f32(res, m2.val, vh, 0); + res = vaddq_f32(res, a.val); + return v_float32x4(res); +} + +#define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint32x4, vaddq_u32) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint32x4, vsubq_u32) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint32x4, vmulq_u32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float32x4, vaddq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float32x4, vsubq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float32x4, vmulq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64) +#else +inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b) +{ + float32x4_t reciprocal = vrecpeq_f32(b.val); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + return v_float32x4(vmulq_f32(a.val, reciprocal)); +} +inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b) +{ + float32x4_t reciprocal = vrecpeq_f32(b.val); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + a.val = vmulq_f32(a.val, reciprocal); + return a; +} +#endif + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_NEON_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint16x8, v_uint32x4) + +// Multiply and expand +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val)); +} + +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val)); + d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val)); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val)); + d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val)); + d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val)); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + return v_int16x8(vcombine_s16( + vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16), + vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16) + )); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + return v_uint16x8(vcombine_u16( + vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16), + vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16) + )); +} + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + int16x8_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int16x4_t a0 = vget_low_s16(uzp1); + int16x4_t b0 = vget_high_s16(uzp1); + int16x4_t a1 = vget_low_s16(uzp2); + int16x4_t b1 = vget_high_s16(uzp2); + int32x4_t p = vmull_s16(a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int16x8_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int16x4_t a0 = vget_low_s16(uzp1); + int16x4_t b0 = vget_high_s16(uzp1); + int16x4_t a1 = vget_low_s16(uzp2); + int16x4_t b1 = vget_high_s16(uzp2); + int32x4_t p = vmlal_s16(c.val, a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + int32x4_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int32x2_t a0 = vget_low_s32(uzp1); + int32x2_t b0 = vget_high_s32(uzp1); + int32x2_t a1 = vget_low_s32(uzp2); + int32x2_t b1 = vget_high_s32(uzp2); + int64x2_t p = vmull_s32(a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int32x4_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int32x2_t a0 = vget_low_s32(uzp1); + int32x2_t b0 = vget_high_s32(uzp1); + int32x2_t a1 = vget_low_s32(uzp2); + int32x2_t b1 = vget_high_s32(uzp2); + int64x2_t p = vmlal_s32(c.val, a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); +#else + const uint8x16_t zero = vreinterpretq_u8_u32(vdupq_n_u32(0)); + const uint8x16_t mask = vreinterpretq_u8_u32(vdupq_n_u32(0x00FF00FF)); + const uint16x8_t zero32 = vreinterpretq_u16_u32(vdupq_n_u32(0)); + const uint16x8_t mask32 = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF)); + + uint16x8_t even = vmulq_u16(vreinterpretq_u16_u8(vbslq_u8(mask, a.val, zero)), + vreinterpretq_u16_u8(vbslq_u8(mask, b.val, zero))); + uint16x8_t odd = vmulq_u16(vshrq_n_u16(vreinterpretq_u16_u8(a.val), 8), + vshrq_n_u16(vreinterpretq_u16_u8(b.val), 8)); + + uint32x4_t s0 = vaddq_u32(vreinterpretq_u32_u16(vbslq_u16(mask32, even, zero32)), + vreinterpretq_u32_u16(vbslq_u16(mask32, odd, zero32))); + uint32x4_t s1 = vaddq_u32(vshrq_n_u32(vreinterpretq_u32_u16(even), 16), + vshrq_n_u32(vreinterpretq_u32_u16(odd), 16)); + return v_uint32x4(vaddq_u32(s0, s1)); +#endif +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); +#else + return v_dotprod_expand(a, b) + c; +#endif +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); +#else + int16x8_t p0 = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + int16x8_t p1 = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val)); + int16x8_t uzp1, uzp2; + _v128_unzip(p0, p1, uzp1, uzp2); + int16x8_t sum = vaddq_s16(uzp1, uzp2); + int16x4_t uzpl1, uzpl2; + _v128_unzip(vget_low_s16(sum), vget_high_s16(sum), uzpl1, uzpl2); + return v_int32x4(vaddl_s16(uzpl1, uzpl2)); +#endif +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(c.val, a.val, b.val)); +#else + return v_dotprod_expand(a, b) + c; +#endif +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + const uint16x8_t zero = vreinterpretq_u16_u32(vdupq_n_u32(0)); + const uint16x8_t mask = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF)); + + uint32x4_t even = vmulq_u32(vreinterpretq_u32_u16(vbslq_u16(mask, a.val, zero)), + vreinterpretq_u32_u16(vbslq_u16(mask, b.val, zero))); + uint32x4_t odd = vmulq_u32(vshrq_n_u32(vreinterpretq_u32_u16(a.val), 16), + vshrq_n_u32(vreinterpretq_u32_u16(b.val), 16)); + uint32x4_t uzp1, uzp2; + _v128_unzip(even, odd, uzp1, uzp2); + uint64x2_t s0 = vaddl_u32(vget_low_u32(uzp1), vget_high_u32(uzp1)); + uint64x2_t s1 = vaddl_u32(vget_low_u32(uzp2), vget_high_u32(uzp2)); + return v_uint64x2(vaddq_u64(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + int32x4_t p0 = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + int32x4_t p1 = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); + + int32x4_t uzp1, uzp2; + _v128_unzip(p0, p1, uzp1, uzp2); + int32x4_t sum = vaddq_s32(uzp1, uzp2); + + int32x2_t uzpl1, uzpl2; + _v128_unzip(vget_low_s32(sum), vget_high_s32(sum), uzpl1, uzpl2); + return v_int64x2(vaddl_s32(uzpl1, uzpl2)); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } +#endif + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ + int16x4_t a0 = vget_low_s16(a.val); + int16x4_t a1 = vget_high_s16(a.val); + int16x4_t b0 = vget_low_s16(b.val); + int16x4_t b1 = vget_high_s16(b.val); + int32x4_t p = vmull_s16(a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int16x4_t a0 = vget_low_s16(a.val); + int16x4_t a1 = vget_high_s16(a.val); + int16x4_t b0 = vget_low_s16(b.val); + int16x4_t b1 = vget_high_s16(b.val); + int32x4_t p = vmlal_s16(c.val, a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ + int32x2_t a0 = vget_low_s32(a.val); + int32x2_t a1 = vget_high_s32(a.val); + int32x2_t b0 = vget_low_s32(b.val); + int32x2_t b1 = vget_high_s32(b.val); + int64x2_t p = vmull_s32(a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int32x2_t a0 = vget_low_s32(a.val); + int32x2_t a1 = vget_high_s32(a.val); + int32x2_t b0 = vget_low_s32(b.val); + int32x2_t b1 = vget_high_s32(b.val); + int64x2_t p = vmlal_s32(c.val, a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); +#else + uint16x8_t p0 = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val)); + uint16x8_t p1 = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val)); + uint32x4_t s0 = vaddl_u16(vget_low_u16(p0), vget_low_u16(p1)); + uint32x4_t s1 = vaddl_u16(vget_high_u16(p0), vget_high_u16(p1)); + return v_uint32x4(vaddq_u32(s0, s1)); +#endif +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); +#else + return v_dotprod_expand_fast(a, b) + c; +#endif +} + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); +#else + int16x8_t prod = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + prod = vmlal_s8(prod, vget_high_s8(a.val), vget_high_s8(b.val)); + return v_int32x4(vaddl_s16(vget_low_s16(prod), vget_high_s16(prod))); +#endif +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(c.val, a.val, b.val)); +#else + return v_dotprod_expand_fast(a, b) + c; +#endif +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + uint32x4_t p0 = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val)); + uint32x4_t p1 = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); + uint64x2_t s0 = vaddl_u32(vget_low_u32(p0), vget_high_u32(p0)); + uint64x2_t s1 = vaddl_u32(vget_low_u32(p1), vget_high_u32(p1)); + return v_uint64x2(vaddq_u64(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + int32x4_t prod = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + prod = vmlal_s16(prod, vget_high_s16(a.val), vget_high_s16(b.val)); + return v_int64x2(vaddl_s32(vget_low_s32(prod), vget_high_s32(prod))); +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod_fast(a, b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } +#endif + + +#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \ + OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \ + OPENCV_HAL_IMPL_NEON_BIN_OP(^, _Tpvec, veorq_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { \ + return _Tpvec(vreinterpretq_##suffix##_u8(vmvnq_u8(vreinterpretq_u8_##suffix(a.val)))); \ + } + +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint8x16, u8) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int8x16, s8) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint16x8, u16) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int16x8, s16) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint32x4, u32) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int32x4, s32) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint64x2, u64) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int64x2, s64) + +#define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + return v_float32x4(vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + a.val = vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(&, vandq_s32) +OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(|, vorrq_s32) +OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(^, veorq_s32) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val)))); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + return v_float32x4(vsqrtq_f32(x.val)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + v_float32x4 one = v_setall_f32(1.0f); + return one / v_sqrt(x); +} +#else +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN)); + float32x4_t e = vrsqrteq_f32(x1); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); + return v_float32x4(vmulq_f32(x.val, e)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + float32x4_t e = vrsqrteq_f32(x.val); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e); + return v_float32x4(e); +} +#endif + +#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ +inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); } + +OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8) +OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16) +OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32) + +inline v_float32x4 v_abs(v_float32x4 x) +{ return v_float32x4(vabsq_f32(x.val)); } + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64) +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64) +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val)))); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + return v_float64x2(vsqrtq_f64(x.val)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + v_float64x2 one = v_setall_f64(1.0f); + return one / v_sqrt(x); +} + +inline v_float64x2 v_abs(v_float64x2 x) +{ return v_float64x2(vabsq_f64(x.val)); } +#endif + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_min, vminq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_max, vmaxq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_min, vminq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_max, vmaxq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_min, vminq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_max, vmaxq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_min, vminq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_max, vmaxq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_min, vminq_u32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_max, vmaxq_u32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64) +#endif + +#if CV_SIMD128_64F +inline int64x2_t vmvnq_s64(int64x2_t a) +{ + int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_s64(a, vx); +} +inline uint64x2_t vmvnq_u64(uint64x2_t a) +{ + uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_u64(a, vx); +} +#endif +#define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vmvnq_##not_suffix(vceqq_##suffix(a.val, b.val)))); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcltq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcgtq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcleq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); } + +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64) +#endif + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(vreinterpretq_f32_u32(vceqq_f32(a.val, a.val))); } +#if CV_SIMD128_64F +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(vreinterpretq_f64_u64(vceqq_f64(a.val, a.val))); } +#endif + +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_add_wrap, vaddq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_mul_wrap, vmulq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16) + +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64) +#endif + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ return v_int8x16(vqabsq_s8(vqsubq_s8(a.val, b.val))); } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_int16x8(vqabsq_s16(vqsubq_s16(a.val, b.val))); } + +#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \ +inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec2(cast(intrin(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int8x16, v_uint8x16, vreinterpretq_u8_s8, v_absdiff, vabdq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int16x8, v_uint16x8, vreinterpretq_u16_s16, v_absdiff, vabdq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int32x4, v_uint32x4, vreinterpretq_u32_s32, v_absdiff, vabdq_s32) + +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 x(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val)); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ +#if CV_SIMD128_64F + // ARMv8, which adds support for 64-bit floating-point (so CV_SIMD128_64F is defined), + // also adds FMA support both for single- and double-precision floating-point vectors + return v_float32x4(vfmaq_f32(c.val, a.val, b.val)); +#else + return v_float32x4(vmlaq_f32(c.val, a.val, b.val)); +#endif +} + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(vmlaq_s32(c.val, a.val, b.val)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(vfmaq_f64(c.val, a.val, b.val)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} +#endif + +// trade efficiency for convenience +#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(vshlq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(vshrq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ return _Tpvec(vrshrq_n_##suffix(a.val, n)); } + +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64) + +#define OPENCV_HAL_IMPL_NEON_ROTATE_OP(_Tpvec, suffix) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint16x8, u16) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int16x8, s16) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint32x4, u32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int32x4, s32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float32x4, f32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint64x2, u64) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64) +#endif + +#if defined(__clang__) && defined(__aarch64__) +// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863 +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ +typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \ +uint64 v = *(unaligned_uint64*)ptr; \ +return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \ +} +#else +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } +#endif + +#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(vld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(vld1q_##suffix(ptr)); } \ +OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ vst1_##suffix(ptr, vget_high_##suffix(a.val)); } + +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64) +#endif + +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val)); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +} +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ + uint32x4_t t0 = vpaddlq_u16(a.val); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline int v_reduce_sum(const v_int16x8& a) +{ + int32x4_t t0 = vpaddlq_s16(a.val); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +} + +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ +} + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8) + +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ +} + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16) + +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \ +} + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); } +inline int64 v_reduce_sum(const v_int64x2& a) +{ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); } +#if CV_SIMD128_64F +inline double v_reduce_sum(const v_float64x2& a) +{ + return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1); +} +#endif + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + float32x4x2_t ab = vtrnq_f32(a.val, b.val); + float32x4x2_t cd = vtrnq_f32(c.val, d.val); + + float32x4_t u0 = vaddq_f32(ab.val[0], ab.val[1]); // a0+a1 b0+b1 a2+a3 b2+b3 + float32x4_t u1 = vaddq_f32(cd.val[0], cd.val[1]); // c0+c1 d0+d1 c2+c3 d2+d3 + + float32x4_t v0 = vcombine_f32(vget_low_f32(u0), vget_low_f32(u1)); + float32x4_t v1 = vcombine_f32(vget_high_f32(u0), vget_high_f32(u1)); + + return v_float32x4(vaddq_f32(v0, v1)); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vabdq_u8(a.val, b.val))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s8(vabdq_s8(a.val, b.val)))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + uint32x4_t t0 = vpaddlq_u16(vabdq_u16(a.val, b.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + uint32x4_t t0 = vpaddlq_u16(vreinterpretq_u16_s16(vabdq_s16(a.val, b.val))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + uint32x4_t t0 = vabdq_u32(a.val, b.val); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + float32x4_t t0 = vabdq_f32(a.val, b.val); + float32x2_t t1 = vpadd_f32(vget_low_f32(t0), vget_high_f32(t0)); + return vget_lane_f32(vpadd_f32(t1, t1), 0); +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vcntq_u8(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vcntq_u8(vreinterpretq_u8_s8(a.val))); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u16(a.val)))); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s16(a.val)))); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u32(a.val))))); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s32(a.val))))); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(a.val)))))); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s64(a.val)))))); } + +inline int v_signmask(const v_uint8x16& a) +{ + int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100)); + uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0)); + uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0))); + return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8); +} +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_uint16x8& a) +{ + int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000)); + uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0)); + uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0)); + return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4); +} +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } + +inline int v_signmask(const v_uint32x4& a) +{ + int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000)); + uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0)); + uint64x2_t v1 = vpaddlq_u32(v0); + return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2); +} +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_uint64x2& a) +{ + int64x1_t m0 = vdup_n_s64(0); + uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0)); + return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1); +} +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#endif + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +#if CV_SIMD128_64F +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } +#endif + +#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \ +inline bool v_check_all(const v_##_Tpvec& a) \ +{ \ + _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \ + uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \ + return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \ +} \ +inline bool v_check_any(const v_##_Tpvec& a) \ +{ \ + _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \ + uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \ + return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \ +} + +OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7) +OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15) +OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31) + +inline bool v_check_all(const v_uint64x2& a) +{ + uint64x2_t v0 = vshrq_n_u64(a.val, 63); + return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1; +} +inline bool v_check_any(const v_uint64x2& a) +{ + uint64x2_t v0 = vshrq_n_u64(a.val, 63); + return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0; +} + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } + +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#endif + +#define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vbslq_##suffix(vreinterpretq_##usuffix##_##suffix(mask.val), a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_NEON_SELECT(v_uint8x16, u8, u8) +OPENCV_HAL_IMPL_NEON_SELECT(v_int8x16, s8, u8) +OPENCV_HAL_IMPL_NEON_SELECT(v_uint16x8, u16, u16) +OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16) +OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32) +OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32) +OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64) +#endif + +#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \ + b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_high_##suffix(a.val))); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \ +} + +OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8) +OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8) +OPENCV_HAL_IMPL_NEON_EXPAND(v_uint16x8, v_uint32x4, ushort, u16) +OPENCV_HAL_IMPL_NEON_EXPAND(v_int16x8, v_int32x4, short, s16) +OPENCV_HAL_IMPL_NEON_EXPAND(v_uint32x4, v_uint64x2, uint, u32) +OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + uint8x8_t v0 = vcreate_u8(*(unaligned_uint*)ptr); + uint16x4_t v1 = vget_low_u16(vmovl_u8(v0)); + return v_uint32x4(vmovl_u16(v1)); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + int8x8_t v0 = vcreate_s8(*(unaligned_uint*)ptr); + int16x4_t v1 = vget_low_s16(vmovl_s8(v0)); + return v_int32x4(vmovl_s16(v1)); +} + +#if defined(__aarch64__) || defined(_M_ARM64) +#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + b0.val = vzip1q_##suffix(a0.val, a1.val); \ + b1.val = vzip2q_##suffix(a0.val, a1.val); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ + d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ +} +#else +#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + _Tpvec##x2_t p = vzipq_##suffix(a0.val, a1.val); \ + b0.val = p.val[0]; \ + b1.val = p.val[1]; \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ + d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ +} +#endif + +OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8) +OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8) +OPENCV_HAL_IMPL_NEON_UNPACKS(uint16x8, u16) +OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16) +OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32) +OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64) +#endif + +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + uint8x16_t vec = vrev64q_u8(a.val); + return v_uint8x16(vextq_u8(vec, vec, 8)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + uint16x8_t vec = vrev64q_u16(a.val); + return v_uint16x8(vextq_u16(vec, vec, 4)); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + uint32x4_t vec = vrev64q_u32(a.val); + return v_uint32x4(vextq_u32(vec, vec, 2)); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + uint64x2_t vec = a.val; + uint64x1_t vec_lo = vget_low_u64(vec); + uint64x1_t vec_hi = vget_high_u64(vec); + return v_uint64x2(vcombine_u64(vec_hi, vec_lo)); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +#if CV_SIMD128_64F +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } +#endif + +#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \ +template \ +inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \ +} + +OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8) +OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8) +OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16) +OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16) +OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32) +OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64) +OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64) +OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_NEON_EXTRACT_N(_Tpvec, _Tp, suffix) \ +template inline _Tp v_extract_n(_Tpvec v) { return vgetq_lane_##suffix(v.val, i); } + +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint32x4, uint, u32) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float64x2, double, f64) +#endif + +#define OPENCV_HAL_IMPL_NEON_BROADCAST(_Tpvec, _Tp, suffix) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) { _Tp t = v_extract_n(v); return v_setall_##suffix(t); } + +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint32x4, uint, u32) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BROADCAST(v_float64x2, double, f64) +#endif + +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float32x4& a) +{ + float32x4_t a_ = a.val; + int32x4_t result; + __asm__ ("fcvtns %0.4s, %1.4s" + : "=w"(result) + : "w"(a_) + : /* No clobbers */); + return v_int32x4(result); +} +#else +inline v_int32x4 v_round(const v_float32x4& a) +{ + static const int32x4_t v_sign = vdupq_n_s32(1 << 31), + v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); + + int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val))); + return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition)))); +} +#endif +inline v_int32x4 v_floor(const v_float32x4& a) +{ + int32x4_t a1 = vcvtq_s32_f32(a.val); + uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val); + return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask))); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + int32x4_t a1 = vcvtq_s32_f32(a.val); + uint32x4_t mask = vcgtq_f32(a.val, vcvtq_f32_s32(a1)); + return v_int32x4(vsubq_s32(a1, vreinterpretq_s32_u32(mask))); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(vcvtq_s32_f32(a.val)); } + +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val)))); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + int64x2_t a1 = vcvtq_s64_f64(a.val); + uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val); + a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask)); + return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + int64x2_t a1 = vcvtq_s64_f64(a.val); + uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1)); + a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask)); + return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); +} +#endif + +#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \ +inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ + const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ + v_##_Tpvec& b0, v_##_Tpvec& b1, \ + v_##_Tpvec& b2, v_##_Tpvec& b3) \ +{ \ + /* m00 m01 m02 m03 */ \ + /* m10 m11 m12 m13 */ \ + /* m20 m21 m22 m23 */ \ + /* m30 m31 m32 m33 */ \ + _Tpvec##x2_t t0 = vtrnq_##suffix(a0.val, a1.val); \ + _Tpvec##x2_t t1 = vtrnq_##suffix(a2.val, a3.val); \ + /* m00 m10 m02 m12 */ \ + /* m01 m11 m03 m13 */ \ + /* m20 m30 m22 m32 */ \ + /* m21 m31 m23 m33 */ \ + b0.val = vcombine_##suffix(vget_low_##suffix(t0.val[0]), vget_low_##suffix(t1.val[0])); \ + b1.val = vcombine_##suffix(vget_low_##suffix(t0.val[1]), vget_low_##suffix(t1.val[1])); \ + b2.val = vcombine_##suffix(vget_high_##suffix(t0.val[0]), vget_high_##suffix(t1.val[0])); \ + b3.val = vcombine_##suffix(vget_high_##suffix(t0.val[1]), vget_high_##suffix(t1.val[1])); \ +} + +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32) +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32) + +#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + _Tpvec##x2_t v = vld2q_##suffix(ptr); \ + a.val = v.val[0]; \ + b.val = v.val[1]; \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + _Tpvec##x3_t v = vld3q_##suffix(ptr); \ + a.val = v.val[0]; \ + b.val = v.val[1]; \ + c.val = v.val[2]; \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + _Tpvec##x4_t v = vld4q_##suffix(ptr); \ + a.val = v.val[0]; \ + b.val = v.val[1]; \ + c.val = v.val[2]; \ + d.val = v.val[3]; \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + _Tpvec##x2_t v; \ + v.val[0] = a.val; \ + v.val[1] = b.val; \ + vst2q_##suffix(ptr, v); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + _Tpvec##x3_t v; \ + v.val[0] = a.val; \ + v.val[1] = b.val; \ + v.val[2] = c.val; \ + vst3q_##suffix(ptr, v); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec##x4_t v; \ + v.val[0] = a.val; \ + v.val[1] = b.val; \ + v.val[2] = c.val; \ + v.val[3] = d.val; \ + vst4q_##suffix(ptr, v); \ +} + +#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix) \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t a1 = vld1_##suffix(ptr + 2); \ + tp##x1_t b1 = vld1_##suffix(ptr + 3); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ +} \ + \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, \ + v_##tp##x2& b, v_##tp##x2& c ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t c0 = vld1_##suffix(ptr + 2); \ + tp##x1_t a1 = vld1_##suffix(ptr + 3); \ + tp##x1_t b1 = vld1_##suffix(ptr + 4); \ + tp##x1_t c1 = vld1_##suffix(ptr + 5); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ + c = v_##tp##x2(vcombine_##suffix(c0, c1)); \ +} \ + \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \ + v_##tp##x2& c, v_##tp##x2& d ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t c0 = vld1_##suffix(ptr + 2); \ + tp##x1_t d0 = vld1_##suffix(ptr + 3); \ + tp##x1_t a1 = vld1_##suffix(ptr + 4); \ + tp##x1_t b1 = vld1_##suffix(ptr + 5); \ + tp##x1_t c1 = vld1_##suffix(ptr + 6); \ + tp##x1_t d1 = vld1_##suffix(ptr + 7); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ + c = v_##tp##x2(vcombine_##suffix(c0, c1)); \ + d = v_##tp##x2(vcombine_##suffix(d0, d1)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 3, vget_high_##suffix(b.val)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \ + const v_##tp##x2& b, const v_##tp##x2& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \ + vst1_##suffix(ptr + 3, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 4, vget_high_##suffix(b.val)); \ + vst1_##suffix(ptr + 5, vget_high_##suffix(c.val)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ + const v_##tp##x2& c, const v_##tp##x2& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \ + vst1_##suffix(ptr + 3, vget_low_##suffix(d.val)); \ + vst1_##suffix(ptr + 4, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 5, vget_high_##suffix(b.val)); \ + vst1_##suffix(ptr + 6, vget_high_##suffix(c.val)); \ + vst1_##suffix(ptr + 7, vget_high_##suffix(d.val)); \ +} + +OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64) +#endif + +OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(int64, s64) +OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(uint64, u64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(vcvtq_f32_s32(a.val)); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + float32x2_t zero = vdup_n_f32(0.0f); + return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), vcvt_f32_f64(b.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val)))); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val)))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val))); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ return v_float64x2(vcvtq_f64_s64(a.val)); } + +#endif + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(vld1q_s16(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(vld1q_s16(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(vcombine_s16(vld1_s16(tab + idx[0]), vld1_s16(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(vld1q_s32(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(vcombine_s32(vld1_s32(tab + idx[0]), vld1_s32(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vld1q_s32(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(vcombine_s64(vcreate_s64(tab[idx[0]]), vcreate_s64(tab[idx[1]]))); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(vld1q_s64(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(vld1q_f32(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; + + uint64 CV_DECL_ALIGNED(32) elems[2] = + { + *(unaligned_uint64*)(tab + idx[0]), + *(unaligned_uint64*)(tab + idx[1]) + }; + return v_float32x4(vreinterpretq_f32_u64(vld1q_u64(elems))); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(vld1q_f32(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_int32x4(vld1q_s32(elems)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_uint32x4(vld1q_u32(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_float32x4(vld1q_f32(elems)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + /*int CV_DECL_ALIGNED(32) idx[4]; + v_store(idx, idxvec); + + float32x4_t xy02 = vcombine_f32(vld1_f32(tab + idx[0]), vld1_f32(tab + idx[2])); + float32x4_t xy13 = vcombine_f32(vld1_f32(tab + idx[1]), vld1_f32(tab + idx[3])); + + float32x4x2_t xxyy = vuzpq_f32(xy02, xy13); + x = v_float32x4(xxyy.val[0]); + y = v_float32x4(xxyy.val[1]);*/ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); + y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0705060403010200)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0705060403010200)))); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0703060205010400)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0703060205010400)))); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + return v_int16x8(vreinterpretq_s16_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100)), vtbl1_s8(vget_high_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100))))); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + int16x4x2_t res = vzip_s16(vget_low_s16(vec.val), vget_high_s16(vec.val)); + return v_int16x8(vcombine_s16(res.val[0], res.val[1])); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + int32x2x2_t res = vzip_s32(vget_low_s32(vec.val), vget_high_s32(vec.val)); + return v_int32x4(vcombine_s32(res.val[0], res.val[1])); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + return v_int8x16(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0605040201000000)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0807060504020100))), vdupq_n_s8(0), 2)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + return v_int16x8(vreinterpretq_s16_s8(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0504030201000000)), vget_high_s8(vreinterpretq_s8_s16(vec.val))), vdupq_n_s8(0), 2))); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +#if CV_SIMD128_64F +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_float64x2(vld1q_f64(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(vld1q_f64(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + }; + return v_float64x2(vld1q_f64(elems)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} +#endif + +////// FP16 support /////// +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + float16x4_t v = + #ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro + (float16x4_t)vld1_s16((const short*)ptr); + #else + vld1_f16((const __fp16*)ptr); + #endif + return v_float32x4(vcvt_f32_f16(v)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + float16x4_t hv = vcvt_f16_f32(v.val); + + #ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro + vst1_s16((short*)ptr, (int16x4_t)hv); + #else + vst1_f16((__fp16*)ptr, hv); + #endif +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + const int N = 4; + float buf[N]; + for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + const int N = 4; + float buf[N]; + v_store(buf, v); + for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); +} +#endif + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_rvv.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_rvv.hpp new file mode 100644 index 0000000..cb2140d --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_rvv.hpp @@ -0,0 +1,2902 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// The original implementation has been contributed by Yin Zhang. +// Copyright (C) 2020, Institute of Software, Chinese Academy of Sciences. + +#ifndef OPENCV_HAL_INTRIN_RVV_HPP +#define OPENCV_HAL_INTRIN_RVV_HPP + +#include + +namespace cv +{ + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 + +//////////// Unsupported native intrinsics in C++ //////////// + +struct vuint8mf2_t +{ + uchar val[8] = {0}; + vuint8mf2_t() {} + vuint8mf2_t(const uchar* ptr) + { + for (int i = 0; i < 8; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint8mf2_t +{ + schar val[8] = {0}; + vint8mf2_t() {} + vint8mf2_t(const schar* ptr) + { + for (int i = 0; i < 8; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vuint16mf2_t +{ + ushort val[4] = {0}; + vuint16mf2_t() {} + vuint16mf2_t(const ushort* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint16mf2_t +{ + short val[4] = {0}; + vint16mf2_t() {} + vint16mf2_t(const short* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vuint32mf2_t +{ + unsigned val[2] = {0}; + vuint32mf2_t() {} + vuint32mf2_t(const unsigned* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vint32mf2_t +{ + int val[2] = {0}; + vint32mf2_t() {} + vint32mf2_t(const int* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vfloat32mf2_t +{ + float val[2] = {0}; + vfloat32mf2_t() {} + vfloat32mf2_t(const float* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vuint64mf2_t +{ + uint64 val[1] = {0}; + vuint64mf2_t() {} + vuint64mf2_t(const uint64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vint64mf2_t +{ + int64 val[1] = {0}; + vint64mf2_t() {} + vint64mf2_t(const int64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vfloat64mf2_t +{ + double val[1] = {0}; + vfloat64mf2_t() {} + vfloat64mf2_t(const double* ptr) + { + val[0] = ptr[0]; + } +}; +struct vuint8mf4_t +{ + uchar val[4] = {0}; + vuint8mf4_t() {} + vuint8mf4_t(const uchar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint8mf4_t +{ + schar val[4] = {0}; + vint8mf4_t() {} + vint8mf4_t(const schar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; + +#define OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(_Tpvec, _Tp, suffix, width, n) \ +inline _Tpvec vle##width##_v_##suffix##mf2(const _Tp* ptr) \ +{ \ + return _Tpvec(ptr); \ +} \ +inline void vse##width##_v_##suffix##mf2(_Tp* ptr, _Tpvec v) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + ptr[i] = v.val[i]; \ + } \ +} + +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint8mf2_t, uint8_t, u8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint8mf2_t, int8_t, i8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint16mf2_t, uint16_t, u16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint16mf2_t, int16_t, i16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint32mf2_t, uint32_t, u32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint32mf2_t, int32_t, i32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat32mf2_t, float32_t, f32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint64mf2_t, uint64_t, u64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint64mf2_t, int64_t, i64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat64mf2_t, float64_t, f64, 64, 1) + + +#define OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(_Tpwvec, _Tpvec, _wTp, wcvt, suffix, width, n) \ +inline _Tpwvec wcvt (_Tpvec v) \ +{ \ + _wTp tmp[n]; \ + for (int i = 0; i < n; ++i) \ + { \ + tmp[i] = (_wTp)v.val[i]; \ + } \ + vsetvlmax_e##width##m1(); \ + return vle##width##_v_##suffix##m1(tmp); \ +} + +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint16m1_t, vuint8mf2_t, ushort, vwcvtu_x_x_v_u16m1, u16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint16m1_t, vint8mf2_t, short, vwcvt_x_x_v_i16m1, i16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint32m1_t, vuint16mf2_t, unsigned, vwcvtu_x_x_v_u32m1, u32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint32m1_t, vint16mf2_t, int, vwcvt_x_x_v_i32m1, i32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint64m1_t, vuint32mf2_t, uint64, vwcvtu_x_x_v_u64m1, u64, 64, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint64m1_t, vint32mf2_t, int64, vwcvt_x_x_v_i64m1, i64, 64, 2) + +inline vuint8mf4_t vle8_v_u8mf4 (const uint8_t *base) +{ + return vuint8mf4_t(base); +} +inline vint8mf4_t vle8_v_i8mf4 (const int8_t *base) +{ + return vint8mf4_t(base); +} + +inline vuint16mf2_t vwcvtu_x_x_v_u16mf2 (vuint8mf4_t src) +{ + ushort tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (ushort)src.val[i]; + } + return vle16_v_u16mf2(tmp); +} +inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src) +{ + short tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (short)src.val[i]; + } + return vle16_v_i16mf2(tmp); +} + +//////////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) + { + vsetvlmax_e8m1(); + vse8_v_u8m1(val, v); + } + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint8m1_t() const + { + vsetvlmax_e8m1(); + return vle8_v_u8m1(val); + } + uchar get0() const + { + return val[0]; + } + + uchar val[16]; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) + { + vsetvlmax_e8m1(); + vse8_v_i8m1(val, v); + } + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint8m1_t() const + { + vsetvlmax_e8m1(); + return vle8_v_i8m1(val); + } + schar get0() const + { + return val[0]; + } + + schar val[16]; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) + { + vsetvlmax_e16m1(); + vse16_v_u16m1(val, v); + } + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint16m1_t() const + { + vsetvlmax_e16m1(); + return vle16_v_u16m1(val); + } + ushort get0() const + { + return val[0]; + } + + ushort val[8]; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) + { + vsetvlmax_e16m1(); + vse16_v_i16m1(val, v); + } + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint16m1_t() const + { + vsetvlmax_e16m1(); + return vle16_v_i16m1(val); + } + short get0() const + { + return val[0]; + } + + short val[8]; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_u32m1(val, v); + } + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_u32m1(val); + } + unsigned get0() const + { + return val[0]; + } + + unsigned val[4]; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_i32m1(val, v); + } + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_i32m1(val); + } + int get0() const + { + return val[0]; + } + int val[4]; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_f32m1(val, v); + } + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_f32m1(val); + } + float get0() const + { + return val[0]; + } + float val[4]; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_u64m1(val, v); + } + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_u64m1(val); + } + uint64 get0() const + { + return val[0]; + } + + uint64 val[2]; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_i64m1(val, v); + } + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_i64m1(val); + } + int64 get0() const + { + return val[0]; + } + + int64 val[2]; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_f64m1(val, v); + } + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_f64m1(val); + } + double get0() const + { + return val[0]; + } + + double val[2]; +}; +#endif + + +//////////// Initial //////////// + +#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, width, suffix1, suffix2) \ +inline v_##_Tpvec v_setzero_##suffix1() \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpvec(vzero_##suffix2##m1()); \ +} \ +inline v_##_Tpvec v_setall_##suffix1(_Tp v) \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpvec(vmv_v_x_##suffix2##m1(v)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8x16, uchar, 8, u8, u8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8x16, schar, 8, s8, i8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16x8, ushort, 16, u16, u16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16x8, short, 16, s16, i16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32x4, unsigned, 32, u32, u32) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32x4, int, 32, s32, i32) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64x2, uint64, 64, u64, u64) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64x2, int64, 64, s64, i64) + +#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, width, suffix) \ +inline v_##_Tpv v_setzero_##suffix() \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpv(vzero_##suffix##m1()); \ +} \ +inline v_##_Tpv v_setall_##suffix(_Tp v) \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpv(vfmv_v_f_##suffix##m1(v)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_FP(float32x4, float, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INIT_FP(float64x2, double, 64, f64) +#endif + +//////////// Reinterpret //////////// + +#define OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(_Tpvec, suffix) \ +inline v_##_Tpvec v_reinterpret_as_##suffix(const v_##_Tpvec& v) { return v; } + +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint8x16, u8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int8x16, s8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint16x8, u16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int16x8, s16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint32x4, u32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int32x4, s32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float32x4, f32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint64x2, u64) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int64x2, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(_Tpvec1, _Tpvec2, _nTpvec1, _nTpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \ +inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \ +{ \ + vsetvlmax_e##width2##m1(); \ + return v_##_Tpvec1((_nTpvec1)vle##width2##_v_##nsuffix2##m1(v.val)); \ +} \ +inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \ +{ \ + vsetvlmax_e##width1##m1(); \ + return v_##_Tpvec2((_nTpvec2)vle##width1##_v_##nsuffix1##m1(v.val)); \ +} + +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int8x16, vuint8m1_t, vint8m1_t, u8, s8, u8, i8, 8, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int16x8, vuint16m1_t, vint16m1_t, u16, s16, u16, i16, 16, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int32x4, vuint32m1_t, vint32m1_t, u32, s32, u32, i32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float32x4, vuint32m1_t, vfloat32m1_t, u32, f32, u32, f32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float32x4, vint32m1_t, vfloat32m1_t, s32, f32, i32, f32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int64x2, vuint64m1_t, vint64m1_t, u64, s64, u64, i64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint16x8, vuint8m1_t, vuint16m1_t, u8, u16, u8, u16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint32x4, vuint8m1_t, vuint32m1_t, u8, u32, u8, u32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint64x2, vuint8m1_t, vuint64m1_t, u8, u64, u8, u64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint32x4, vuint16m1_t, vuint32m1_t, u16, u32, u16, u32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint64x2, vuint16m1_t, vuint64m1_t, u16, u64, u16, u64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, uint64x2, vuint32m1_t, vuint64m1_t, u32, u64, u32, u64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int16x8, vint8m1_t, vint16m1_t, s8, s16, i8, i16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int32x4, vint8m1_t, vint32m1_t, s8, s32, i8, i32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int64x2, vint8m1_t, vint64m1_t, s8, s64, i8, i64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int32x4, vint16m1_t, vint32m1_t, s16, s32, i16, i32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int64x2, vint16m1_t, vint64m1_t, s16, s64, i16, i64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, int64x2, vint32m1_t, vint64m1_t, s32, s64, i32, i64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int16x8, vuint8m1_t, vint16m1_t, u8, s16, u8, i16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int32x4, vuint8m1_t, vint32m1_t, u8, s32, u8, i32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int64x2, vuint8m1_t, vint64m1_t, u8, s64, u8, i64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int8x16, vuint16m1_t, vint8m1_t, u16, s8, u16, i8, 16, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int32x4, vuint16m1_t, vint32m1_t, u16, s32, u16, i32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int64x2, vuint16m1_t, vint64m1_t, u16, s64, u16, i64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int8x16, vuint32m1_t, vint8m1_t, u32, s8, u32, i8, 32, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int16x8, vuint32m1_t, vint16m1_t, u32, s16, u32, i16, 32, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int64x2, vuint32m1_t, vint64m1_t, u32, s64, u32, i64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int8x16, vuint64m1_t, vint8m1_t, u64, s8, u64, i8, 64, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int16x8, vuint64m1_t, vint16m1_t, u64, s16, u64, i16, 64, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int32x4, vuint64m1_t, vint32m1_t, u64, s32, u64, i32, 64, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float32x4, vuint8m1_t, vfloat32m1_t, u8, f32, u8, f32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float32x4, vuint16m1_t, vfloat32m1_t, u16, f32, u16, f32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float32x4, vuint64m1_t, vfloat32m1_t, u64, f32, u64, f32, 64, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float32x4, vint8m1_t, vfloat32m1_t, s8, f32, i8, f32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float32x4, vint16m1_t, vfloat32m1_t, s16, f32, i16, f32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float32x4, vint64m1_t, vfloat32m1_t, s64, f32, i64, f32, 64, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float64x2, vuint64m1_t, vfloat64m1_t, u64, f64, u64, f64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float64x2, vint64m1_t, vfloat64m1_t, s64, f64, i64, f64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float64x2, vuint8m1_t, vfloat64m1_t, u8, f64, u8, f64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float64x2, vuint16m1_t, vfloat64m1_t, u16, f64, u16, f64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float64x2, vuint32m1_t, vfloat64m1_t, u32, f64, u32, f64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float64x2, vint8m1_t, vfloat64m1_t, s8, f64, i8, f64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float64x2, vint16m1_t, vfloat64m1_t, s16, f64, i16, f64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float64x2, vint32m1_t, vfloat64m1_t, s32, f64, i32, f64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(float32x4, float64x2, vfloat32m1_t, vfloat64m1_t, f32, f64, f32, f64, 32, 64) +#endif + +////////////// Extract ////////////// + +#define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, suffix, width, vmv) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, s), b, _Tpvec::nlanes - s)); \ +} \ +template inline _Tp v_extract_n(_Tpvec v) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tp(vmv(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), v, i))); \ +} + + +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint8x16, uchar, u8, 8, vmv_x_s_u8m1_u8) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int8x16, schar, i8, 8, vmv_x_s_i8m1_i8) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint16x8, ushort, u16, 16, vmv_x_s_u16m1_u16) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int16x8, short, i16, 16, vmv_x_s_i16m1_i16) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint32x4, uint, u32, 32, vmv_x_s_u32m1_u32) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int32x4, int, i32, 32, vmv_x_s_i32m1_i32) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint64x2, uint64, u64, 64, vmv_x_s_u64m1_u64) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int64x2, int64, i64, 64, vmv_x_s_i64m1_i64) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_float32x4, float, f32, 32, vfmv_f_s_f32m1_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_EXTRACT(v_float64x2, double, f64, 64, vfmv_f_s_f64m1_f64) +#endif + +////////////// Load/Store ////////////// + +#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, width, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ \ + vsetvlmax_e8m1(); \ + return _Tpvec((_nTpvec)vle8_v_u8m1((uchar*)ptr)); \ +} \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ +} \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ + vsetvl_e##width##m1(hvl); \ + _Tpvec res = _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ + vsetvlmax_e##width##m1(); \ + return res; \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e8m1(); \ + vse8_v_u8m1((uchar*)ptr, vle8_v_u8m1((uchar*)a.val)); \ +} \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(tmp_ptr, a); \ + for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ + { \ + ptr[i] = tmp_ptr[i]; \ + } \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(tmp_ptr, a); \ + for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ + { \ + ptr[i] = tmp_ptr[i+_Tpvec::nlanes/2]; \ + } \ +} + +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8x16, vuint8m1_t, uchar, 8, 8, u8) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8x16, vint8m1_t, schar, 8, 8, i8) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16x8, vuint16m1_t, ushort, 4, 16, u16) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16x8, vint16m1_t, short, 4, 16, i16) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32x4, vuint32m1_t, unsigned, 2, 32, u32) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32x4, vint32m1_t, int, 2, 32, i32) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64x2, vuint64m1_t, uint64, 1, 64, u64) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64x2, vint64m1_t, int64, 1, 64, i64) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32x4, vfloat32m1_t, float, 2, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64x2, vfloat64m1_t, double, 1, 64, f64) +#endif + +inline v_int8x16 v_load_halves(const schar* ptr0, const schar* ptr1) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr0[4], ptr0[5], ptr0[6], ptr0[7], + ptr1[0], ptr1[1], ptr1[2], ptr1[3], ptr1[4], ptr1[5], ptr1[6], ptr1[7] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_uint8x16 v_load_halves(const uchar* ptr0, const uchar* ptr1) { return v_reinterpret_as_u8(v_load_halves((schar*)ptr0, (schar*)ptr1)); } + +inline v_int16x8 v_load_halves(const short* ptr0, const short* ptr1) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr1[0], ptr1[1], ptr1[2], ptr1[3] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_uint16x8 v_load_halves(const ushort* ptr0, const ushort* ptr1) { return v_reinterpret_as_u16(v_load_halves((short*)ptr0, (short*)ptr1)); } + +inline v_int32x4 v_load_halves(const int* ptr0, const int* ptr1) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_float32x4 v_load_halves(const float* ptr0, const float* ptr1) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_uint32x4 v_load_halves(const unsigned* ptr0, const unsigned* ptr1) { return v_reinterpret_as_u32(v_load_halves((int*)ptr0, (int*)ptr1)); } + +inline v_int64x2 v_load_halves(const int64* ptr0, const int64* ptr1) +{ + int64 CV_DECL_ALIGNED(32) elems[2] = + { + ptr0[0], ptr1[0] + }; + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(elems)); +} +inline v_uint64x2 v_load_halves(const uint64* ptr0, const uint64* ptr1) { return v_reinterpret_as_u64(v_load_halves((int64*)ptr0, (int64*)ptr1)); } + +#if CV_SIMD128_64F +inline v_float64x2 v_load_halves(const double* ptr0, const double* ptr1) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr0[0], ptr1[0] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} +#endif + + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(tab + idx[0])); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + int64_t CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(elems)); +} +inline v_int64x2 v_lut_pairs(const int64* tab, const int* idx) +{ + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_uint32x4(vle32_v_u32m1(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); + y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4] = {0}; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} +#endif + +////////////// Pack boolean //////////////////// + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + ushort CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 8, b); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr), 0)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 4, b); + v_store(ptr + 8, c); + v_store(ptr + 12, d); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vle32_v_u32m4(ptr), 0), 0)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + uint64 CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 2, b); + v_store(ptr + 4, c); + v_store(ptr + 6, d); + v_store(ptr + 8, e); + v_store(ptr + 10, f); + v_store(ptr + 12, g); + v_store(ptr + 14, h); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vnsrl_wx_u32m4(vle64_v_u64m8(ptr), 0), 0), 0)); +} + +////////////// Arithmetics ////////////// +#define OPENCV_HAL_IMPL_RVV_BIN_OP(bin_op, _Tpvec, intrin, width) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(intrin(a, b)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + a = _Tpvec(intrin(a, b)); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint8x16, vsaddu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint8x16, vssubu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint8x16, vdivu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int8x16, vsadd_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int8x16, vssub_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int8x16, vdiv_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint16x8, vsaddu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint16x8, vssubu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint16x8, vdivu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int16x8, vsadd_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int16x8, vssub_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int16x8, vdiv_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint32x4, vadd_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint32x4, vsub_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint32x4, vmul_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint32x4, vdivu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int32x4, vadd_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int32x4, vsub_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int32x4, vmul_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int32x4, vdiv_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float32x4, vfadd_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float32x4, vfsub_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float32x4, vfmul_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float32x4, vfdiv_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint64x2, vadd_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint64x2, vsub_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint64x2, vmul_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint64x2, vdivu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int64x2, vadd_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int64x2, vsub_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int64x2, vmul_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int64x2, vdiv_vv_i64m1, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float64x2, vfadd_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float64x2, vfsub_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float64x2, vfmul_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float64x2, vfdiv_vv_f64m1, 64) +#endif + + +////////////// Bitwise logic ////////////// + +#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(&, _Tpvec, vand_vv_##suffix##m1, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(|, _Tpvec, vor_vv_##suffix##m1, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(^, _Tpvec, vxor_vv_##suffix##m1, width) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vnot_v_##suffix##m1(a)); \ +} + +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64x2, i64, 64) + +#define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + vsetvlmax_e32m1(); \ + return v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + vsetvlmax_e32m1(); \ + a = v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(&, vand_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(|, vor_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(^, vxor_vv_i32m1) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_float32x4(vreinterpret_v_i32m1_f32m1(vnot_v_i32m1(vreinterpret_v_f32m1_i32m1(a)))); +} + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + vsetvlmax_e64m1(); \ + return v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + vsetvlmax_e64m1(); \ + a = v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(&, vand_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(|, vor_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(^, vxor_vv_i64m1) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + vsetvlmax_e64m1(); + return v_float64x2(vreinterpret_v_i64m1_f64m1(vnot_v_i64m1(vreinterpret_v_f64m1_i64m1(a)))); +} +#endif + +////////////// Bitwise shifts ////////////// + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ +} + +#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ +} + +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64x2, i64, 64) + + +////////////// Comparison ////////////// + +#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ +} + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ +} + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmsltu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgtu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsleu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsgeu_vv_##suffix##m1_b##width, suffix, width) + +#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmslt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsle_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsge_vv_##suffix##m1_b##width, suffix, width) + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ==, vmfeq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, !=, vmfne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <, vmflt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >, vmfgt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <=, vmfle_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >=, vmfge_vv_##suffix##m1_b##width, suffix, width) + + +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64x2, i64, 64) +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32x4, f32, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64x2, f64, 64) +#endif + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return a == a; } + +#if CV_SIMD128_64F +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return a == a; } +#endif + +////////////// Min/Max ////////////// + +#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, width) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(intrin(a, b)); \ +} + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_min, vminu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_max, vmaxu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_min, vmin_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_max, vmax_vv_i64m1, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 64) +#endif + +////////////// Arithmetics wrap ////////////// + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 16) + +////////////// Reduce ////////////// + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, wwidth, red) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + vsetvlmax_e##wwidth##m1(); \ + _nwTpvec zero = vzero_##wsuffix##m1(); \ + _nwTpvec res = vzero_##wsuffix##m1(); \ + res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero); \ + return (scalartype)(_wTpvec(res).get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint8x16, v_uint16x8, vuint16m1_t, unsigned, u8, u16, 16, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8x16, v_int16x8, vint16m1_t, int, i8, i16, 16, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16x8, v_uint32x4, vuint32m1_t, unsigned, u16, u32, 32, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16x8, v_int32x4, vint32m1_t, int, i16, i32, 32, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32x4, v_uint64x2, vuint64m1_t, unsigned, u32, u64, 64, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32x4, v_int64x2, vint64m1_t, int, i32, i64, 64, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float32x4, v_float32x4, vfloat32m1_t, float, f32, f32, 32, fredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64x2, v_uint64x2, vuint64m1_t, uint64, u64, u64, 64, redsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64x2, v_int64x2, vint64m1_t, int64, i64, i64, 64, redsum) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float64x2, v_float64x2, vfloat64m1_t, double, f64, f64, 64, fredsum) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, width, red) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + _Tpvec res = _Tpvec(v##red##_vs_##suffix##m1_##suffix##m1(a, a, a)); \ + return scalartype(res.get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, min, uchar, u8, 8, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, min, schar, i8, 8, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, min, ushort, u16, 16, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, min, short, i16, 16, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, min, unsigned, u32, 32, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, min, int, i32, 32, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, min, float, f32, 32, fredmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, max, uchar, u8, 8, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, max, schar, i8, 8, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, max, ushort, u16, 16, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, max, short, i16, 16, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, max, unsigned, u32, 32, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, max, int, i32, 32, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, max, float, f32, 32, fredmax) + + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + v_reduce_sum(a), + v_reduce_sum(b), + v_reduce_sum(c), + v_reduce_sum(d) + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} + +////////////// Square-Root ////////////// + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfsqrt_v_f32m1(x)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + v_float32x4 one = v_setall_f32(1.0f); + return one / v_sqrt(x); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfsqrt_v_f64m1(x)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + v_float64x2 one = v_setall_f64(1.0f); + return one / v_sqrt(x); +} +#endif + +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + vsetvlmax_e32m1(); + v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + vsetvlmax_e64m1(); + v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); +} +#endif + +////////////// Multiply-Add ////////////// + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfmacc_vv_f32m1(c, a, b)); +} +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + vsetvlmax_e32m1(); + return v_int32x4(vmacc_vv_i32m1(c, a, b)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfmacc_vv_f64m1(c, a, b)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} +#endif + +////////////// Check all/any ////////////// + +#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, width) \ +inline bool v_check_all(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a), shift)); \ + return (v.val[0] | v.val[1]) == 0; \ +} \ +inline bool v_check_any(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift)); \ + return (v.val[0] | v.val[1]) != 0; \ +} + +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 8) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 16) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 32) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 64) + + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } + +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } + +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#endif + +////////////// abs ////////////// + +#define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \ +inline _Tpvec v_##abs(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_max(a, b) - v_min(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint8x16, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint16x8, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint32x4, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float32x4, absdiff) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64x2, absdiff) +#endif +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8x16, absdiffs) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs) + +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width) \ +inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b)), 0)); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 32) + +#define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \ +inline _Tprvec v_abs(const _Tpvec& a) \ +{ \ + return v_absdiff(a, v_setzero_##suffix()); \ +} + +OPENCV_HAL_IMPL_RVV_ABS(v_uint8x16, v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_ABS(v_uint16x8, v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_ABS(v_uint32x4, v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_ABS(v_float32x4, v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABS(v_float64x2, v_float64x2, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \ +inline scalartype v_reduce_sad(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_reduce_sum(v_absdiff(a, b)); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32x4, float) + +////////////// Select ////////////// + +#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, merge, ne, width) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(merge(ne(mask, 0), b, a)); \ +} + +OPENCV_HAL_IMPL_RVV_SELECT(v_uint8x16, vmerge_vvm_u8m1, vmsne_vx_u8m1_b8, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_int8x16, vmerge_vvm_i8m1, vmsne_vx_i8m1_b8, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint16x8, vmerge_vvm_u16m1, vmsne_vx_u16m1_b16, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_int16x8, vmerge_vvm_i16m1, vmsne_vx_i16m1_b16, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint32x4, vmerge_vvm_u32m1, vmsne_vx_u32m1_b32, 32) +OPENCV_HAL_IMPL_RVV_SELECT(v_int32x4, vmerge_vvm_i32m1, vmsne_vx_i32m1_b32, 32) +OPENCV_HAL_IMPL_RVV_SELECT(v_float32x4, vmerge_vvm_f32m1, vmfne_vf_f32m1_b32, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELECT(v_float64x2, vmerge_vvm_f64m1, vmfne_vf_f64m1_b64, 64) +#endif + +////////////// Rotate shift ////////////// + +#define OPENCV_HAL_IMPL_RVV_ROTATE_OP(_Tpvec, suffix, width) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n), b, _Tpvec::nlanes - n)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), b, _Tpvec::nlanes - n), a, n)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + + +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float32x4, f32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int64x2, i64, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float64x2, f64, 64) +#endif + +////////////// Convert to float ////////////// + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfcvt_f_x_v_f32m1(a)); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + vsetvlmax_e32m1(); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + vsetvlmax_e32m1(); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[0], ptr[1] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[2], ptr[3] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[0], ptr[1] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[2], ptr[3] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfcvt_f_x_v_f64m1(a)); +} +#endif + +////////////// Broadcast ////////////// + +#define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) \ +{ \ + return v_setall_##suffix(v_extract_n(v)); \ +} + +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint8x16, u8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint16x8, u16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint32x4, u32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint64x2, u64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int64x2, s64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float64x2, f64) +#endif + +////////////// Transpose4x4 ////////////// + +#define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, _Tp, suffix) \ +inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ + const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ + v_##_Tpvec& b0, v_##_Tpvec& b1, \ + v_##_Tpvec& b2, v_##_Tpvec& b3) \ +{ \ + _Tp CV_DECL_ALIGNED(32) elems0[4] = \ + { \ + v_extract_n<0>(a0), \ + v_extract_n<0>(a1), \ + v_extract_n<0>(a2), \ + v_extract_n<0>(a3) \ + }; \ + b0 = v_load(elems0); \ + _Tp CV_DECL_ALIGNED(32) elems1[4] = \ + { \ + v_extract_n<1>(a0), \ + v_extract_n<1>(a1), \ + v_extract_n<1>(a2), \ + v_extract_n<1>(a3) \ + }; \ + b1 = v_load(elems1); \ + _Tp CV_DECL_ALIGNED(32) elems2[4] = \ + { \ + v_extract_n<2>(a0), \ + v_extract_n<2>(a1), \ + v_extract_n<2>(a2), \ + v_extract_n<2>(a3) \ + }; \ + b2 = v_load(elems2); \ + _Tp CV_DECL_ALIGNED(32) elems3[4] = \ + { \ + v_extract_n<3>(a0), \ + v_extract_n<3>(a1), \ + v_extract_n<3>(a2), \ + v_extract_n<3>(a3) \ + }; \ + b3 = v_load(elems3); \ +} + +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(float32x4, float, f32) + +////////////// Reverse ////////////// + +#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, width, suffix) \ +inline _Tpvec v_reverse(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptra[_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + { \ + ptr[i] = ptra[_Tpvec::nlanes-i-1]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8x16, uchar, 8, u8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int8x16, schar, 8, i8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16x8, ushort, 16, u16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int16x8, short, 16, i16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32x4, unsigned, 32, u32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int32x4, int, 32, i32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_float32x4, float, 32, f32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64x2, uint64, 64, u64) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int64x2, int64, 64, i64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REVERSE(v_float64x2, double, 64, f64) +#endif + +//////////// Value reordering //////////// + +#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + v_store_high(hptr, a); \ + b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ + b1 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_high(hptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(ptr))); \ +} + +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint16x8, uchar, v_uint8x16, 8, u8, vwcvtu_x_x_v_u16m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int16x8, schar, v_int8x16, 8, i8, vwcvt_x_x_v_i16m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint32x4, ushort, v_uint16x8, 16, u16, vwcvtu_x_x_v_u32m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int32x4, short, v_int16x8, 16, i16, vwcvt_x_x_v_i32m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint64x2, uint, v_uint32x4, 32, u32, vwcvtu_x_x_v_u64m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int64x2, int, v_int32x4, 32, i32, vwcvt_x_x_v_i64m1) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + vsetvlmax_e32m1(); + return v_uint32x4(vwcvtu_x_x_v_u32m1(vwcvtu_x_x_v_u16mf2(vle8_v_u8mf4(ptr)))); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + vsetvlmax_e32m1(); + return v_int32x4(vwcvt_x_x_v_i32m1(vwcvt_x_x_v_i16mf2(vle8_v_i8mf4(ptr)))); +} + + +#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, shr) \ +inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0)); \ +} \ +inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0))); \ +} \ +template inline \ +_Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n)); \ +} \ +template inline \ +void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n))); \ +} + +OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 16, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 16, i16, vnclip_wx_i8m1, vnclip_wx_i8m1) +OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 32, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 32, i32, vnclip_wx_i16m1, vnclip_wx_i16m1) +OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 64, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 64, i64, vnclip_wx_i32m1, vnsra_wx_i32m1) + + +#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, cast) \ +inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0)); \ +} \ +inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0))); \ +} \ +template inline \ +_Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n)); \ +} \ +template inline \ +void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n))); \ +} + +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 16, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 32, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2) + + +#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, width, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra0[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptra1[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb0[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb1[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra0, a0); \ + v_store(ptra1, a1); \ + int i; \ + for( i = 0; i < v_##_Tpvec::nlanes/2; i++ ) \ + { \ + ptrb0[i*2] = ptra0[i]; \ + ptrb0[i*2+1] = ptra1[i]; \ + } \ + for( ; i < v_##_Tpvec::nlanes; i++ ) \ + { \ + ptrb1[i*2-v_##_Tpvec::nlanes] = ptra0[i]; \ + ptrb1[i*2-v_##_Tpvec::nlanes+1] = ptra1[i]; \ + } \ + b0 = v_load(ptrb0); \ + b1 = v_load(ptrb1); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_low(ptra, a); \ + v_store_low(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_high(ptra, a); \ + v_store_high(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c = v_combine_low(a, b); \ + d = v_combine_high(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_UNPACKS(uint8x16, uchar, 8, u8) +OPENCV_HAL_IMPL_RVV_UNPACKS(int8x16, schar, 8, i8) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint16x8, ushort, 16, u16) +OPENCV_HAL_IMPL_RVV_UNPACKS(int16x8, short, 16, i16) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint32x4, unsigned, 32, u32) +OPENCV_HAL_IMPL_RVV_UNPACKS(int32x4, int, 32, i32) +OPENCV_HAL_IMPL_RVV_UNPACKS(float32x4, float, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2, double, 64, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + int i, i2; \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptra[i] = ptr[i2]; \ + ptrb[i] = ptr[i2+1]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + int i, i3; \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptra[i] = ptr[i3]; \ + ptrb[i] = ptr[i3+1]; \ + ptrc[i] = ptr[i3+2]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + int i, i4; \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptra[i] = ptr[i4]; \ + ptrb[i] = ptr[i4+1]; \ + ptrc[i] = ptr[i4+2]; \ + ptrd[i] = ptr[i4+3]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ + d = v_load(ptrd); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i2; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptr[i2] = ptra[i]; \ + ptr[i2+1] = ptrb[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i3; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptr[i3] = ptra[i]; \ + ptr[i3+1] = ptrb[i]; \ + ptr[i3+2] = ptrc[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + int i, i4; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + v_store(ptrd, d); \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptr[i4] = ptra[i]; \ + ptr[i4+1] = ptrb[i]; \ + ptr[i4+2] = ptrc[i]; \ + ptr[i4+3] = ptrd[i]; \ + } \ +} \ +inline v_##_Tpvec v_interleave_pairs(const v_##_Tpvec& vec) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/4; i++) \ + { \ + ptr[4*i ] = ptrvec[4*i ]; \ + ptr[4*i+1] = ptrvec[4*i+2]; \ + ptr[4*i+2] = ptrvec[4*i+1]; \ + ptr[4*i+3] = ptrvec[4*i+3]; \ + } \ + return v_load(ptr); \ +} \ +inline v_##_Tpvec v_interleave_quads(const v_##_Tpvec& vec) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/8; i++) \ + { \ + ptr[8*i ] = ptrvec[4*i ]; \ + ptr[8*i+1] = ptrvec[4*i+4]; \ + ptr[8*i+2] = ptrvec[4*i+1]; \ + ptr[8*i+3] = ptrvec[4*i+5]; \ + ptr[8*i+4] = ptrvec[4*i+2]; \ + ptr[8*i+5] = ptrvec[4*i+6]; \ + ptr[8*i+6] = ptrvec[4*i+3]; \ + ptr[8*i+7] = ptrvec[4*i+7]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8x16, uchar, u8, 8) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8x16, schar, i8, 8) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16x8, ushort, u16, 16) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16x8, short, i16, 16) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32x4, unsigned, u32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32x4, int, i32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32x4, float, f32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64x2, uint64, u64, 64) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64x2, int64, i64, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64x2, double, f64, 64) +#endif + +//////////// PopCount //////////// + +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +#define OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(_rTpvec, _Tpvec, _rTp, _Tp, suffix) \ +inline _rTpvec v_popcount(const _Tpvec& a) \ +{ \ + uchar CV_DECL_ALIGNED(32) ptra[16] = {0}; \ + v_store(ptra, v_reinterpret_as_u8(a)); \ + _rTp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_setzero_##suffix()); \ + for (int i = 0; i < _Tpvec::nlanes*(int)sizeof(_Tp); i++) \ + ptr[i/sizeof(_Tp)] += popCountTable[ptra[i]]; \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_uint8x16, uchar, uchar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_int8x16, uchar, schar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_uint16x8, ushort, ushort, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_int16x8, ushort, short, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_uint32x4, unsigned, unsigned, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_int32x4, unsigned, int, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_uint64x2, uint64, uint64, u64) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64) + +//////////// SignMask //////////// + +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, width, shift) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + int mask = 0; \ + vsetvlmax_e##width##m1(); \ + _Tpvec tmp = _Tpvec(vsrl_vx_##suffix##m1(a, shift)); \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + mask |= (int)(tmp.val[i]) << i; \ + return mask; \ +} + +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint8x16, uchar, u8, 8, 7) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint16x8, ushort, u16, 16, 15) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint32x4, unsigned, u32, 32, 31) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint64x2, uint64, u64, 64, 63) + +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#endif + + +//////////// Scan forward //////////// + +#define OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(_Tpvec, _Tp, suffix) \ +inline int v_scan_forward(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_reinterpret_as_##suffix(a)); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + if(int(ptr[i]) < 0) \ + return i; \ + return 0; \ +} + +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float32x4, float, f32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int64x2, int64, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64) +#endif + +//////////// Pack triplets //////////// + +#define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, _Tp) \ +inline _Tpvec v_pack_triplets(const _Tpvec& vec) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < _Tpvec::nlanes/4; i++) \ + { \ + ptr[3*i ] = ptrvec[4*i ]; \ + ptr[3*i+1] = ptrvec[4*i+2]; \ + ptr[3*i+2] = ptrvec[4*i+2]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint8x16, uchar) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int8x16, schar) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint16x8, ushort) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int16x8, short) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int32x4, int) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32x4, float) + + +////// FP16 support /////// + +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr))); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v)); +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + const int N = 4; + float buf[N]; + for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + const int N = 4; + float buf[N]; + v_store(buf, v); + for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); +} +#endif + +////////////// Rounding ////////////// + +inline v_int32x4 v_round(const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(a)); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a - ZP5; + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(t)); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a + ZP5; + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(t)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a)); +} +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + double arr[4] = {a.val[0]-0.5f, a.val[1]-0.5f, 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + double arr[4] = {a.val[0]+0.5f, a.val[1]+0.5f, 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp)); +} +#endif + + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int32x4 t1, t2; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int32x4 t1, t2; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; +} + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + v_int64x2 t1, t2; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + v_int64x2 t1, t2; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } +#endif + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2; +} +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2 + c; +} + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2; +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2 + c; +} + + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod_fast(a, b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } +#endif + + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + vsetvlmax_e32m1(); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); + res = vfmacc_vf_f32m1(res, v_extract_n<3>(v), m3); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + vsetvlmax_e32m1(); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); + return v_float32x4(res) + a; +} + +#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width) \ +inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \ +{ \ + _Tpw CV_DECL_ALIGNED(32) ptr[_Tpwvec::nlanes*2] = {0}; \ + vsetvlmax_e##width##m2(); \ + vse##width##_v_##suffix##m2(ptr, wmul(a, b)); \ + vsetvlmax_e##width##m1(); \ + c = _Tpwvec(vle##width##_v_##suffix##m1(ptr)); \ + d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes)); \ +} + +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8x16, v_uint16x8, ushort, u16, vwmulu_vv_u16m2, 16) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8x16, v_int16x8, short, i16, vwmul_vv_i16m2, 16) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16x8, v_uint32x4, unsigned, u32, vwmulu_vv_u32m2, 32) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16x8, v_int32x4, int, i32, vwmul_vv_i32m2, 32) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32x4, v_uint64x2, uint64, u64, vwmulu_vv_u64m2, 64) + + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + vsetvlmax_e16m1(); + return v_int16x8(vnsra_wx_i16m1(vwmul_vv_i32m2(a, b), 16)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + vsetvlmax_e16m1(); + return v_uint16x8(vnsrl_wx_u16m1(vwmulu_vv_u32m2(a, b), 16)); +} + + +//////// Saturating Multiply //////// + +#define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _wTpvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _wTpvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a = a * b; \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int16x8, v_int32x4) + + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + + +} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_sse.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_sse.hpp new file mode 100644 index 0000000..f4b43a2 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_sse.hpp @@ -0,0 +1,3455 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_SSE_HPP +#define OPENCV_HAL_SSE_HPP + +#include +#include "opencv2/core/utility.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +#define CV_SIMD128_FP16 0 // no native operations with FP16 type. + +namespace cv +{ + +//! @cond IGNORED + +// +// Compilation troubleshooting: +// - MSVC: error C2719: 'a': formal parameter with requested alignment of 16 won't be aligned +// Replace parameter declaration to const reference: +// -v_int32x4 a +// +const v_int32x4& a +// + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + typedef __m128i vector_type; + enum { nlanes = 16 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint8x16() {} + explicit v_uint8x16(__m128i v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3, + (char)v4, (char)v5, (char)v6, (char)v7, + (char)v8, (char)v9, (char)v10, (char)v11, + (char)v12, (char)v13, (char)v14, (char)v15); + } + + uchar get0() const + { + return (uchar)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + typedef __m128i vector_type; + enum { nlanes = 16 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int8x16() {} + explicit v_int8x16(__m128i v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3, + (char)v4, (char)v5, (char)v6, (char)v7, + (char)v8, (char)v9, (char)v10, (char)v11, + (char)v12, (char)v13, (char)v14, (char)v15); + } + + schar get0() const + { + return (schar)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + typedef __m128i vector_type; + enum { nlanes = 8 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint16x8() {} + explicit v_uint16x8(__m128i v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3, + (short)v4, (short)v5, (short)v6, (short)v7); + } + + ushort get0() const + { + return (ushort)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + typedef __m128i vector_type; + enum { nlanes = 8 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int16x8() {} + explicit v_int16x8(__m128i v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3, + (short)v4, (short)v5, (short)v6, (short)v7); + } + + short get0() const + { + return (short)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + typedef __m128i vector_type; + enum { nlanes = 4 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint32x4() {} + explicit v_uint32x4(__m128i v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + val = _mm_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3); + } + + unsigned get0() const + { + return (unsigned)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + typedef __m128i vector_type; + enum { nlanes = 4 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int32x4() {} + explicit v_int32x4(__m128i v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + val = _mm_setr_epi32(v0, v1, v2, v3); + } + + int get0() const + { + return _mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + typedef __m128 vector_type; + enum { nlanes = 4 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_float32x4() {} + explicit v_float32x4(__m128 v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + val = _mm_setr_ps(v0, v1, v2, v3); + } + + float get0() const + { + return _mm_cvtss_f32(val); + } + + __m128 val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + typedef __m128i vector_type; + enum { nlanes = 2 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint64x2() {} + explicit v_uint64x2(__m128i v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32)); + } + + uint64 get0() const + { + #if !defined(__x86_64__) && !defined(_M_X64) + int a = _mm_cvtsi128_si32(val); + int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32)); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #else + return (uint64)_mm_cvtsi128_si64(val); + #endif + } + + __m128i val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + typedef __m128i vector_type; + enum { nlanes = 2 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int64x2() {} + explicit v_int64x2(__m128i v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32)); + } + + int64 get0() const + { + #if !defined(__x86_64__) && !defined(_M_X64) + int a = _mm_cvtsi128_si32(val); + int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32)); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #else + return _mm_cvtsi128_si64(val); + #endif + } + + __m128i val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + typedef __m128d vector_type; + enum { nlanes = 2 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_float64x2() {} + explicit v_float64x2(__m128d v) : val(v) {} + v_float64x2(double v0, double v1) + { + val = _mm_setr_pd(v0, v1); + } + + double get0() const + { + return _mm_cvtsd_f64(val); + } + + __m128d val; +}; + +namespace hal_sse_internal +{ + template + to_sse_type v_sse_reinterpret_as(const from_sse_type& val); + +#define OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(to_sse_type, from_sse_type, sse_cast_intrin) \ + template<> inline \ + to_sse_type v_sse_reinterpret_as(const from_sse_type& a) \ + { return sse_cast_intrin(a); } + + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128i, OPENCV_HAL_NOP) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128, _mm_castps_si128) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128d, _mm_castpd_si128) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128i, _mm_castsi128_ps) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128, OPENCV_HAL_NOP) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128d, _mm_castpd_ps) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128i, _mm_castsi128_pd) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128, _mm_castps_pd) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128d, OPENCV_HAL_NOP) +} + +#define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ +{ return _Tpvec(cast(a.val)); } + +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, schar, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, schar, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int32x4, int, s32, si128, epi32, int, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_float32x4, float, f32, ps, ps, float, _mm_castsi128_ps) +OPENCV_HAL_IMPL_SSE_INITVEC(v_float64x2, double, f64, pd, pd, double, _mm_castsi128_pd) + +inline v_uint64x2 v_setzero_u64() { return v_uint64x2(_mm_setzero_si128()); } +inline v_int64x2 v_setzero_s64() { return v_int64x2(_mm_setzero_si128()); } +inline v_uint64x2 v_setall_u64(uint64 val) { return v_uint64x2(val, val); } +inline v_int64x2 v_setall_s64(int64 val) { return v_int64x2(val, val); } + +template inline +v_uint64x2 v_reinterpret_as_u64(const _Tpvec& a) { return v_uint64x2(a.val); } +template inline +v_int64x2 v_reinterpret_as_s64(const _Tpvec& a) { return v_int64x2(a.val); } +inline v_float32x4 v_reinterpret_as_f32(const v_uint64x2& a) +{ return v_float32x4(_mm_castsi128_ps(a.val)); } +inline v_float32x4 v_reinterpret_as_f32(const v_int64x2& a) +{ return v_float32x4(_mm_castsi128_ps(a.val)); } +inline v_float64x2 v_reinterpret_as_f64(const v_uint64x2& a) +{ return v_float64x2(_mm_castsi128_pd(a.val)); } +inline v_float64x2 v_reinterpret_as_f64(const v_int64x2& a) +{ return v_float64x2(_mm_castsi128_pd(a.val)); } + +#define OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(_Tpvec, suffix) \ +inline _Tpvec v_reinterpret_as_##suffix(const v_float32x4& a) \ +{ return _Tpvec(_mm_castps_si128(a.val)); } \ +inline _Tpvec v_reinterpret_as_##suffix(const v_float64x2& a) \ +{ return _Tpvec(_mm_castpd_si128(a.val)); } + +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint8x16, u8) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int8x16, s8) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint16x8, u16) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int16x8, s16) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint32x4, u32) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int32x4, s32) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint64x2, u64) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int64x2, s64) + +inline v_float32x4 v_reinterpret_as_f32(const v_float32x4& a) {return a; } +inline v_float64x2 v_reinterpret_as_f64(const v_float64x2& a) {return a; } +inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& a) {return v_float32x4(_mm_castpd_ps(a.val)); } +inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& a) {return v_float64x2(_mm_castps_pd(a.val)); } + +//////////////// PACK /////////////// +inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + __m128i delta = _mm_set1_epi16(255); + return v_uint8x16(_mm_packus_epi16(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta)), + _mm_subs_epu16(b.val, _mm_subs_epu16(b.val, delta)))); +} + +inline void v_pack_store(uchar* ptr, const v_uint16x8& a) +{ + __m128i delta = _mm_set1_epi16(255); + __m128i a1 = _mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta)); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1)); +} + +inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) +{ return v_uint8x16(_mm_packus_epi16(a.val, b.val)); } + +inline void v_pack_u_store(uchar* ptr, const v_int16x8& a) +{ _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a.val, a.val)); } + +template inline +v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + return v_uint8x16(_mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(a.val, delta), n), + _mm_srli_epi16(_mm_adds_epu16(b.val, delta), n))); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a) +{ + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + __m128i a1 = _mm_srli_epi16(_mm_adds_epu16(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1)); +} + +template inline +v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + return v_uint8x16(_mm_packus_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n), + _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n))); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1)); +} + +inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) +{ return v_int8x16(_mm_packs_epi16(a.val, b.val)); } + +inline void v_pack_store(schar* ptr, const v_int16x8& a) +{ _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a.val, a.val)); } + +template inline +v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + return v_int8x16(_mm_packs_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n), + _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n))); +} +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x8& a) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a1, a1)); +} + + +// byte-wise "mask ? a : b" +inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b) +{ +#if CV_SSE4_1 + return _mm_blendv_epi8(b, a, mask); +#else + return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask)); +#endif +} + +inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) +{ return v_uint16x8(_v128_packs_epu32(a.val, b.val)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x4& a) +{ + __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32); + __m128i r = _mm_packs_epi32(a1, a1); + _mm_storel_epi64((__m128i*)ptr, _mm_sub_epi16(r, _mm_set1_epi16(-32768))); +} + +template inline +v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i b1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(b.val, delta), n), delta32); + return v_uint16x8(_mm_sub_epi16(_mm_packs_epi32(a1, b1), _mm_set1_epi16(-32768))); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + _mm_storel_epi64((__m128i*)ptr, a2); +} + +inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_uint16x8(_mm_packus_epi32(a.val, b.val)); +#else + __m128i delta32 = _mm_set1_epi32(32768); + + // preliminary saturate negative values to zero + __m128i a1 = _mm_and_si128(a.val, _mm_cmpgt_epi32(a.val, _mm_set1_epi32(0))); + __m128i b1 = _mm_and_si128(b.val, _mm_cmpgt_epi32(b.val, _mm_set1_epi32(0))); + + __m128i r = _mm_packs_epi32(_mm_sub_epi32(a1, delta32), _mm_sub_epi32(b1, delta32)); + return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768))); +#endif +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) +{ +#if CV_SSE4_1 + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a.val, a.val)); +#else + __m128i delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(a.val, delta32); + __m128i r = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + _mm_storel_epi64((__m128i*)ptr, r); +#endif +} + +template inline +v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + __m128i delta = _mm_set1_epi32(1 << (n - 1)); + return v_uint16x8(_mm_packus_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), + _mm_srai_epi32(_mm_add_epi32(b.val, delta), n))); +#else + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + __m128i b1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(b.val, delta), n), delta32); + __m128i b2 = _mm_sub_epi16(_mm_packs_epi32(b1, b1), _mm_set1_epi16(-32768)); + return v_uint16x8(_mm_unpacklo_epi64(a2, b2)); +#endif +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) +{ +#if CV_SSE4_1 + __m128i delta = _mm_set1_epi32(1 << (n - 1)); + __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a1, a1)); +#else + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + _mm_storel_epi64((__m128i*)ptr, a2); +#endif +} + +inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) +{ return v_int16x8(_mm_packs_epi32(a.val, b.val)); } + +inline void v_pack_store(short* ptr, const v_int32x4& a) +{ + _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a.val, a.val)); +} + +template inline +v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)); + return v_int16x8(_mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), + _mm_srai_epi32(_mm_add_epi32(b.val, delta), n))); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x4& a) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)); + __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a1, a1)); +} + + +// [a0 0 | b0 0] [a1 0 | b1 0] +inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0 + return v_uint32x4(_mm_unpacklo_epi32(v0, v1)); +} + +inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a1); +} + +// [a0 0 | b0 0] [a1 0 | b1 0] +inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b) +{ + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0 + return v_int32x4(_mm_unpacklo_epi32(v0, v1)); +} + +inline void v_pack_store(int* ptr, const v_int64x2& a) +{ + __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a1); +} + +template inline +v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + uint64 delta = (uint64)1 << (n-1); + v_uint64x2 delta2(delta, delta); + __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i b1 = _mm_srli_epi64(_mm_add_epi64(b.val, delta2.val), n); + __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0 + return v_uint32x4(_mm_unpacklo_epi32(v0, v1)); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + uint64 delta = (uint64)1 << (n-1); + v_uint64x2 delta2(delta, delta); + __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a2); +} + +inline __m128i v_sign_epi64(__m128i a) +{ + return _mm_shuffle_epi32(_mm_srai_epi32(a, 31), _MM_SHUFFLE(3, 3, 1, 1)); // x m0 | x m1 +} + +inline __m128i v_srai_epi64(__m128i a, int imm) +{ + __m128i smask = v_sign_epi64(a); + return _mm_xor_si128(_mm_srli_epi64(_mm_xor_si128(a, smask), imm), smask); +} + +template inline +v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) +{ + int64 delta = (int64)1 << (n-1); + v_int64x2 delta2(delta, delta); + __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i b1 = v_srai_epi64(_mm_add_epi64(b.val, delta2.val), n); + __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0 + return v_int32x4(_mm_unpacklo_epi32(v0, v1)); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x2& a) +{ + int64 delta = (int64)1 << (n-1); + v_int64x2 delta2(delta, delta); + __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a2); +} + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + __m128i ab = _mm_packs_epi16(a.val, b.val); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + __m128i ab = _mm_packs_epi32(a.val, b.val); + __m128i cd = _mm_packs_epi32(c.val, d.val); + return v_uint8x16(_mm_packs_epi16(ab, cd)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + __m128i ab = _mm_packs_epi32(a.val, b.val); + __m128i cd = _mm_packs_epi32(c.val, d.val); + __m128i ef = _mm_packs_epi32(e.val, f.val); + __m128i gh = _mm_packs_epi32(g.val, h.val); + + __m128i abcd = _mm_packs_epi32(ab, cd); + __m128i efgh = _mm_packs_epi32(ef, gh); + return v_uint8x16(_mm_packs_epi16(abcd, efgh)); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val); + __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val); + __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val); + __m128 v3 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(3, 3, 3, 3)), m3.val); + + return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, v3))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val); + __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val); + __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val); + + return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, a.val))); +} + +#define OPENCV_HAL_IMPL_SSE_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { \ + return _Tpvec(intrin(a.val, b.val)); \ + } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { \ + a.val = intrin(a.val, b.val); \ + return a; \ + } + +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint8x16, _mm_adds_epu8) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint8x16, _mm_subs_epu8) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int8x16, _mm_adds_epi8) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int8x16, _mm_subs_epi8) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint16x8, _mm_adds_epu16) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint16x8, _mm_subs_epu16) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int16x8, _mm_adds_epi16) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int16x8, _mm_subs_epi16) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint32x4, _mm_add_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint32x4, _mm_sub_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint32x4, _v128_mullo_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int32x4, _mm_add_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int32x4, _mm_sub_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int32x4, _v128_mullo_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float32x4, _mm_add_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float32x4, _mm_sub_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float32x4, _mm_mul_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float32x4, _mm_div_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float64x2, _mm_add_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float64x2, _mm_sub_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float64x2, _mm_mul_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float64x2, _mm_div_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint64x2, _mm_add_epi64) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint64x2, _mm_sub_epi64) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int64x2, _mm_add_epi64) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64) + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_SSE_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int16x8, v_int32x4) + +// Multiply and expand +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v_uint16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v_int16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + __m128i v0 = _mm_mullo_epi16(a.val, b.val); + __m128i v1 = _mm_mulhi_epi16(a.val, b.val); + c.val = _mm_unpacklo_epi16(v0, v1); + d.val = _mm_unpackhi_epi16(v0, v1); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + __m128i v0 = _mm_mullo_epi16(a.val, b.val); + __m128i v1 = _mm_mulhi_epu16(a.val, b.val); + c.val = _mm_unpacklo_epi16(v0, v1); + d.val = _mm_unpackhi_epi16(v0, v1); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + __m128i c0 = _mm_mul_epu32(a.val, b.val); + __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + c.val = _mm_unpacklo_epi64(c0, c1); + d.val = _mm_unpackhi_epi64(c0, c1); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { return v_int16x8(_mm_mulhi_epi16(a.val, b.val)); } +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { return v_uint16x8(_mm_mulhi_epu16(a.val, b.val)); } + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(_mm_madd_epi16(a.val, b.val)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + __m128i even = _mm_mul_epi32(a.val, b.val); + __m128i odd = _mm_mul_epi32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + return v_int64x2(_mm_add_epi64(even, odd)); +#else + __m128i even_u = _mm_mul_epu32(a.val, b.val); + __m128i odd_u = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + // convert unsigned to signed high multiplication (from: Agner Fog(veclib) and H S Warren: Hacker's delight, 2003, p. 132) + __m128i a_sign = _mm_srai_epi32(a.val, 31); + __m128i b_sign = _mm_srai_epi32(b.val, 31); + // |x * sign of x + __m128i axb = _mm_and_si128(a.val, b_sign); + __m128i bxa = _mm_and_si128(b.val, a_sign); + // sum of sign corrections + __m128i ssum = _mm_add_epi32(bxa, axb); + __m128i even_ssum = _mm_slli_epi64(ssum, 32); + __m128i odd_ssum = _mm_and_si128(ssum, _mm_set_epi32(-1, 0, -1, 0)); + // convert to signed and prod + return v_int64x2(_mm_add_epi64(_mm_sub_epi64(even_u, even_ssum), _mm_sub_epi64(odd_u, odd_ssum))); +#endif +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i a0 = _mm_srli_epi16(_mm_slli_si128(a.val, 1), 8); // even + __m128i a1 = _mm_srli_epi16(a.val, 8); // odd + __m128i b0 = _mm_srli_epi16(_mm_slli_si128(b.val, 1), 8); + __m128i b1 = _mm_srli_epi16(b.val, 8); + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_uint32x4(_mm_add_epi32(p0, p1)); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + __m128i a0 = _mm_srai_epi16(_mm_slli_si128(a.val, 1), 8); // even + __m128i a1 = _mm_srai_epi16(a.val, 8); // odd + __m128i b0 = _mm_srai_epi16(_mm_slli_si128(b.val, 1), 8); + __m128i b1 = _mm_srai_epi16(b.val, 8); + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_int32x4(_mm_add_epi32(p0, p1)); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 c, d; + v_mul_expand(a, b, c, d); + + v_uint64x2 c0, c1, d0, d1; + v_expand(c, c0, c1); + v_expand(d, d0, d1); + + c0 += c1; d0 += d1; + return v_uint64x2(_mm_add_epi64( + _mm_unpacklo_epi64(c0.val, d0.val), + _mm_unpackhi_epi64(c0.val, d0.val) + )); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return v_int64x2(_mm_add_epi64( + _mm_unpacklo_epi64(c.val, d.val), + _mm_unpackhi_epi64(c.val, d.val) + )); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_cvt_f64(v_dotprod(a, b)); +#else + v_float64x2 c = v_cvt_f64(a) * v_cvt_f64(b); + v_float64x2 d = v_cvt_f64_high(a) * v_cvt_f64_high(b); + + return v_float64x2(_mm_add_pd( + _mm_unpacklo_pd(c.val, d.val), + _mm_unpackhi_pd(c.val, d.val) + )); +#endif +} +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod_fast(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i a0 = v_expand_low(a).val; + __m128i a1 = v_expand_high(a).val; + __m128i b0 = v_expand_low(b).val; + __m128i b1 = v_expand_high(b).val; + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_uint32x4(_mm_add_epi32(p0, p1)); +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_SSE4_1 + __m128i a0 = _mm_cvtepi8_epi16(a.val); + __m128i a1 = v_expand_high(a).val; + __m128i b0 = _mm_cvtepi8_epi16(b.val); + __m128i b1 = v_expand_high(b).val; + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_int32x4(_mm_add_epi32(p0, p1)); +#else + return v_dotprod_expand(a, b); +#endif +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 c, d; + v_mul_expand(a, b, c, d); + + v_uint64x2 c0, c1, d0, d1; + v_expand(c, c0, c1); + v_expand(d, d0, d1); + + c0 += c1; d0 += d1; + return c0 + d0; +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return c + d; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c); +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } + +#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \ + OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \ + OPENCV_HAL_IMPL_SSE_BIN_OP(^, _Tpvec, _mm_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { \ + return _Tpvec(_mm_xor_##suffix(a.val, not_const)); \ + } + +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint8x16, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int8x16, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint16x8, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int16x8, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint32x4, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int32x4, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint64x2, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int64x2, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float32x4, ps, _mm_castsi128_ps(_mm_set1_epi32(-1))) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float64x2, pd, _mm_castsi128_pd(_mm_set1_epi32(-1))) + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ return v_float32x4(_mm_sqrt_ps(x.val)); } + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f); + __m128 t = x.val; + __m128 h = _mm_mul_ps(t, _0_5); + t = _mm_rsqrt_ps(t); + t = _mm_mul_ps(t, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t, t), h))); + return v_float32x4(t); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ return v_float64x2(_mm_sqrt_pd(x.val)); } + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + const __m128d v_1 = _mm_set1_pd(1.); + return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val))); +} + +#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \ +inline _Tpuvec v_abs(const _Tpsvec& x) \ +{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); } + +OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8) +OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16) +inline v_uint32x4 v_abs(const v_int32x4& x) +{ + __m128i s = _mm_srli_epi32(x.val, 31); + __m128i f = _mm_srai_epi32(x.val, 31); + return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s)); +} +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); } +inline v_float64x2 v_abs(const v_float64x2& x) +{ + return v_float64x2(_mm_and_pd(x.val, + _mm_castsi128_pd(_mm_srli_epi64(_mm_set1_epi32(-1), 1)))); +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_SSE_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_min, _mm_min_epu8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_max, _mm_max_epu8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_min, _mm_min_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_max, _mm_max_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_min, _mm_min_ps) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_max, _mm_max_ps) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_min, _mm_min_pd) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_max, _mm_max_pd) + +inline v_int8x16 v_min(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_SSE4_1 + return v_int8x16(_mm_min_epi8(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi8((char)-128); + return v_int8x16(_mm_xor_si128(delta, _mm_min_epu8(_mm_xor_si128(a.val, delta), + _mm_xor_si128(b.val, delta)))); +#endif +} +inline v_int8x16 v_max(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_SSE4_1 + return v_int8x16(_mm_max_epi8(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi8((char)-128); + return v_int8x16(_mm_xor_si128(delta, _mm_max_epu8(_mm_xor_si128(a.val, delta), + _mm_xor_si128(b.val, delta)))); +#endif +} +inline v_uint16x8 v_min(const v_uint16x8& a, const v_uint16x8& b) +{ +#if CV_SSE4_1 + return v_uint16x8(_mm_min_epu16(a.val, b.val)); +#else + return v_uint16x8(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, b.val))); +#endif +} +inline v_uint16x8 v_max(const v_uint16x8& a, const v_uint16x8& b) +{ +#if CV_SSE4_1 + return v_uint16x8(_mm_max_epu16(a.val, b.val)); +#else + return v_uint16x8(_mm_adds_epu16(_mm_subs_epu16(a.val, b.val), b.val)); +#endif +} +inline v_uint32x4 v_min(const v_uint32x4& a, const v_uint32x4& b) +{ +#if CV_SSE4_1 + return v_uint32x4(_mm_min_epu32(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi32((int)0x80000000); + __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)); + return v_uint32x4(v_select_si128(mask, b.val, a.val)); +#endif +} +inline v_uint32x4 v_max(const v_uint32x4& a, const v_uint32x4& b) +{ +#if CV_SSE4_1 + return v_uint32x4(_mm_max_epu32(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi32((int)0x80000000); + __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)); + return v_uint32x4(v_select_si128(mask, a.val, b.val)); +#endif +} +inline v_int32x4 v_min(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_int32x4(_mm_min_epi32(a.val, b.val)); +#else + return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), b.val, a.val)); +#endif +} +inline v_int32x4 v_max(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_int32x4(_mm_max_epi32(a.val, b.val)); +#else + return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), a.val, b.val)); +#endif +} + +#define OPENCV_HAL_IMPL_SSE_INT_CMP_OP(_Tpuvec, _Tpsvec, suffix, sbit) \ +inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \ +{ return _Tpuvec(_mm_cmpeq_##suffix(a.val, b.val)); } \ +inline _Tpuvec operator != (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpuvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \ +} \ +inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \ +{ return _Tpsvec(_mm_cmpeq_##suffix(a.val, b.val)); } \ +inline _Tpsvec operator != (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpsvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \ +} \ +inline _Tpuvec operator < (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask))); \ +} \ +inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask))); \ +} \ +inline _Tpuvec operator <= (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + __m128i not_mask = _mm_set1_epi32(-1); \ + __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask)); \ + return _Tpuvec(_mm_xor_si128(res, not_mask)); \ +} \ +inline _Tpuvec operator >= (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + __m128i not_mask = _mm_set1_epi32(-1); \ + __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask)); \ + return _Tpuvec(_mm_xor_si128(res, not_mask)); \ +} \ +inline _Tpsvec operator < (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + return _Tpsvec(_mm_cmpgt_##suffix(b.val, a.val)); \ +} \ +inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + return _Tpsvec(_mm_cmpgt_##suffix(a.val, b.val)); \ +} \ +inline _Tpsvec operator <= (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(a.val, b.val), not_mask)); \ +} \ +inline _Tpsvec operator >= (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(b.val, a.val), not_mask)); \ +} + +OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint8x16, v_int8x16, epi8, (char)-128) +OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint16x8, v_int16x8, epi16, (short)-32768) +OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint32x4, v_int32x4, epi32, (int)0x80000000) + +#define OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(_Tpvec, suffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpeq_##suffix(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpneq_##suffix(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmplt_##suffix(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpgt_##suffix(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmple_##suffix(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpge_##suffix(a.val, b.val)); } + +OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps) +OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd) + +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#else +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \ + return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#endif + +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(_mm_cmpord_pd(a.val, a.val)); } + +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_add_wrap, _mm_add_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_sub_wrap, _mm_sub_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_sub_wrap, _mm_sub_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_sub_wrap, _mm_sub_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_sub_wrap, _mm_sub_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_mul_wrap, _mm_mullo_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_mul_wrap, _mm_mullo_epi16) + +inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i ad = _mm_srai_epi16(a.val, 8); + __m128i bd = _mm_srai_epi16(b.val, 8); + __m128i p0 = _mm_mullo_epi16(a.val, b.val); // even + __m128i p1 = _mm_slli_epi16(_mm_mullo_epi16(ad, bd), 8); // odd + const __m128i b01 = _mm_set1_epi32(0xFF00FF00); + return v_uint8x16(_v128_blendv_epi8(p0, p1, b01)); +} +inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +/** Absolute difference **/ + +inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = v_sub_wrap(a, b); + v_int8x16 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ + return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); +} +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ + v_int32x4 d = a - b; + v_int32x4 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = a - b; + v_int8x16 m = a < b; + return (d ^ m) - m; + } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_max(a, b) - v_min(a, b); } + + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return a * b + c; +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ +#if CV_FMA3 + return v_float32x4(_mm_fmadd_ps(a.val, b.val, c.val)); +#else + return v_float32x4(_mm_add_ps(_mm_mul_ps(a.val, b.val), c.val)); +#endif +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ +#if CV_FMA3 + return v_float64x2(_mm_fmadd_pd(a.val, b.val, c.val)); +#else + return v_float64x2(_mm_add_pd(_mm_mul_pd(a.val, b.val), c.val)); +#endif +} + +#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \ +inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpreg absmask = _mm_castsi128_##suffix(absmask_vec); \ + return _Tpvec(_mm_and_##suffix(_mm_sub_##suffix(a.val, b.val), absmask)); \ +} \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpvec res = v_fma(a, a, b*b); \ + return _Tpvec(_mm_sqrt_##suffix(res.val)); \ +} \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_fma(a, a, b*b); \ +} \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ \ + return v_fma(a, b, c); \ +} + +OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float32x4, float, __m128, ps, _mm_set1_epi32((int)0x7fffffff)) +OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float64x2, double, __m128d, pd, _mm_srli_epi64(_mm_set1_epi32(-1), 1)) + +#define OPENCV_HAL_IMPL_SSE_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \ +inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \ +} \ +inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(srai(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shl(const _Tpuvec& a) \ +{ \ + return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shl(const _Tpsvec& a) \ +{ \ + return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shr(const _Tpuvec& a) \ +{ \ + return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shr(const _Tpsvec& a) \ +{ \ + return _Tpsvec(srai(a.val, imm)); \ +} + +OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint16x8, v_int16x8, epi16, _mm_srai_epi16) +OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint32x4, v_int32x4, epi32, _mm_srai_epi32) +OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint64x2, v_int64x2, epi64, v_srai_epi64) + +namespace hal_sse_internal +{ + template 16)), + bool is_first = (imm == 0), + bool is_half = (imm == 8), + bool is_second = (imm == 16), + bool is_other = (((imm > 0) && (imm < 8)) || ((imm > 8) && (imm < 16)))> + class v_sse_palignr_u8_class; + + template + class v_sse_palignr_u8_class; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i& a, const __m128i&) const + { + return a; + } + }; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + return _mm_unpacklo_epi64(_mm_unpackhi_epi64(a, a), b); + } + }; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i&, const __m128i& b) const + { + return b; + } + }; + + template + class v_sse_palignr_u8_class + { +#if CV_SSSE3 + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + return _mm_alignr_epi8(b, a, imm); + } +#else + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + enum { imm2 = (sizeof(__m128i) - imm) }; + return _mm_or_si128(_mm_srli_si128(a, imm), _mm_slli_si128(b, imm2)); + } +#endif + }; + + template + inline __m128i v_sse_palignr_u8(const __m128i& a, const __m128i& b) + { + CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_sse_palignr_u8."); + return v_sse_palignr_u8_class()(a, b); + } +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + _mm_srli_si128( + v_sse_reinterpret_as<__m128i>(a.val), imm2))); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + _mm_slli_si128( + v_sse_reinterpret_as<__m128i>(a.val), imm2))); +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + v_sse_palignr_u8( + v_sse_reinterpret_as<__m128i>(a.val), + v_sse_reinterpret_as<__m128i>(b.val)))); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_sse_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + v_sse_palignr_u8( + v_sse_reinterpret_as<__m128i>(b.val), + v_sse_reinterpret_as<__m128i>(a.val)))); +} + +#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(_Tpvec, _Tp) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(_mm_loadl_epi64((const __m128i*)ptr)); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \ + _mm_loadl_epi64((const __m128i*)ptr1))); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storeu_si128((__m128i*)ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ _mm_store_si128((__m128i*)ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_si128((__m128i*)ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_si128((__m128i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_si128((__m128i*)ptr, a.val); \ + else \ + _mm_store_si128((__m128i*)ptr, a.val); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storel_epi64((__m128i*)ptr, a.val); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a.val, a.val)); } + +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint8x16, uchar) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int8x16, schar) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint16x8, ushort) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int16x8, short) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int32x4, int) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint64x2, uint64) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(_mm_loadu_##suffix(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(_mm_load_##suffix(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(_mm_castsi128_##suffix(_mm_loadl_epi64((const __m128i*)ptr))); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + return _Tpvec(_mm_castsi128_##suffix( \ + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \ + _mm_loadl_epi64((const __m128i*)ptr1)))); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storeu_##suffix(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ _mm_store_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_##suffix(ptr, a.val); \ + else \ + _mm_store_##suffix(ptr, a.val); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + __m128i a1 = _mm_cast##suffix##_si128(a.val); \ + _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a1, a1)); \ +} + +OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) +OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) + +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ + __m128i half = _mm_sad_epu8(a.val, _mm_setzero_si128()); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + __m128i half = _mm_set1_epi8((schar)-128); + half = _mm_sad_epu8(_mm_xor_si128(a.val, half), _mm_setzero_si128()); + return _mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))) - 2048; +} +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(func) \ +inline schar v_reduce_##func(const v_int8x16& a) \ +{ \ + __m128i val = a.val; \ + __m128i smask = _mm_set1_epi8((schar)-128); \ + val = _mm_xor_si128(val, smask); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (schar)_mm_cvtsi128_si32(val) ^ (schar)-128; \ +} \ +inline uchar v_reduce_##func(const v_uint8x16& a) \ +{ \ + __m128i val = a.val; \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (uchar)_mm_cvtsi128_si32(val); \ +} +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(min) + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \ +inline scalartype v_reduce_##func(const v_##_Tpvec& a) \ +{ \ + __m128i val = a.val; \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \ + return (scalartype)_mm_cvtsi128_si32(val); \ +} \ +inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \ +{ \ + __m128i val = a.val; \ + __m128i smask = _mm_set1_epi16(sbit); \ + val = _mm_xor_si128(val, smask); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \ + return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^ sbit); \ +} +OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768) + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 8))); \ + val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 4))); \ + return (scalartype)_mm_cvt##extract(val); \ +} + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + scalartype CV_DECL_ALIGNED(16) buf[4]; \ + v_store_aligned(buf, a); \ + scalartype s0 = scalar_func(buf[0], buf[1]); \ + scalartype s1 = scalar_func(buf[2], buf[3]); \ + return scalar_func(s0, s1); \ +} + +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32) + +inline int v_reduce_sum(const v_int16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} +inline double v_reduce_sum(const v_float64x2& a) +{ + double CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ +#if CV_SSE3 + __m128 ab = _mm_hadd_ps(a.val, b.val); + __m128 cd = _mm_hadd_ps(c.val, d.val); + return v_float32x4(_mm_hadd_ps(ab, cd)); +#else + __m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val)); + __m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val)); + return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd))); +#endif +} + +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min) + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i half = _mm_sad_epu8(a.val, b.val); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + __m128i half = _mm_set1_epi8(0x7f); + half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ + __m128i m1 = _mm_set1_epi32(0x55555555); + __m128i m2 = _mm_set1_epi32(0x33333333); + __m128i m4 = _mm_set1_epi32(0x0f0f0f0f); + __m128i p = a.val; + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); + return v_uint8x16(p); +} +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff); +} +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff); +} +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ + return v_uint64x2(_mm_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm_setzero_si128())); +} +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \ +inline int v_signmask(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)); } \ +inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \ +inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; } +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3) + +#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \ +inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \ +inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \ +inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; } +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8) + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \ +} + +OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, _mm_castps_si128, _mm_castsi128_ps, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, _mm_castps_si128, _mm_castsi128_ps, ps) +// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd) +// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, OPENCV_HAL_NOP, OPENCV_HAL_NOP, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, OPENCV_HAL_NOP, OPENCV_HAL_NOP, pd) + +#else // CV_SSE4_1 + +#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(_mm_xor_##suffix(b.val, _mm_and_##suffix(_mm_xor_##suffix(b.val, a.val), mask.val))); \ +} + +OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128) +// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, si128) +// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd) +#endif + +/* Expand */ +#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(a.val); \ + b1.val = __CV_CAT(intrin, _high)(a.val); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(a.val)); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \ + inline _Tpwvec v_load_expand(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, _v128_cvtepu8_epi16) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int8x16, v_int16x8, schar, _v128_cvtepi8_epi16) +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, _v128_cvtepu16_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int16x8, v_int32x4, short, _v128_cvtepi16_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint32x4, v_uint64x2, unsigned, _v128_cvtepu32_epi64) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int32x4, v_int64x2, int, _v128_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_cvtsi32_si128(*(const int*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_uint32x4, uchar, _v128_cvtepu8_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_int32x4, schar, _v128_cvtepi8_epi32) + +#define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = _mm_unpacklo_##suffix(a0.val, a1.val); \ + b1.val = _mm_unpackhi_##suffix(a0.val, a1.val); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \ + return _Tpvec(cast_to(_mm_unpacklo_epi64(a1, b1))); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \ + return _Tpvec(cast_to(_mm_unpackhi_epi64(a1, b1))); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \ + c.val = cast_to(_mm_unpacklo_epi64(a1, b1)); \ + d.val = cast_to(_mm_unpackhi_epi64(a1, b1)); \ +} + +OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_int16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd) + +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return v_uint8x16(_mm_shuffle_epi8(a.val, perm)); +#else + uchar CV_DECL_ALIGNED(32) d[16]; + v_store_aligned(d, a); + return v_uint8x16(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8], d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]); +#endif +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + return v_uint16x8(_mm_shuffle_epi8(a.val, perm)); +#else + __m128i r = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3)); + r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + return v_uint16x8(r); +#endif +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + return v_uint32x4(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + return v_uint64x2(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(1, 0, 3, 2))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ + return v_rotate_right(a, b); +} + +inline v_int32x4 v_round(const v_float32x4& a) +{ return v_int32x4(_mm_cvtps_epi32(a.val)); } + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + __m128i a1 = _mm_cvtps_epi32(a.val); + __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(_mm_cvtepi32_ps(a1), a.val)); + return v_int32x4(_mm_add_epi32(a1, mask)); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + __m128i a1 = _mm_cvtps_epi32(a.val); + __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(a.val, _mm_cvtepi32_ps(a1))); + return v_int32x4(_mm_sub_epi32(a1, mask)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(_mm_cvttps_epi32(a.val)); } + +inline v_int32x4 v_round(const v_float64x2& a) +{ return v_int32x4(_mm_cvtpd_epi32(a.val)); } + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + __m128i ai = _mm_cvtpd_epi32(a.val), bi = _mm_cvtpd_epi32(b.val); + return v_int32x4(_mm_unpacklo_epi64(ai, bi)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + __m128i a1 = _mm_cvtpd_epi32(a.val); + __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(_mm_cvtepi32_pd(a1), a.val)); + mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0 + return v_int32x4(_mm_add_epi32(a1, mask)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + __m128i a1 = _mm_cvtpd_epi32(a.val); + __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(a.val, _mm_cvtepi32_pd(a1))); + mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0 + return v_int32x4(_mm_sub_epi32(a1, mask)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ return v_int32x4(_mm_cvttpd_epi32(a.val)); } + +#define OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + __m128i t0 = cast_from(_mm_unpacklo_##suffix(a0.val, a1.val)); \ + __m128i t1 = cast_from(_mm_unpacklo_##suffix(a2.val, a3.val)); \ + __m128i t2 = cast_from(_mm_unpackhi_##suffix(a0.val, a1.val)); \ + __m128i t3 = cast_from(_mm_unpackhi_##suffix(a2.val, a3.val)); \ +\ + b0.val = cast_to(_mm_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm_unpackhi_epi64(t2, t3)); \ +} + +OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) + +// load deinterleave +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) +{ + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + + __m128i t10 = _mm_unpacklo_epi8(t00, t01); + __m128i t11 = _mm_unpackhi_epi8(t00, t01); + + __m128i t20 = _mm_unpacklo_epi8(t10, t11); + __m128i t21 = _mm_unpackhi_epi8(t10, t11); + + __m128i t30 = _mm_unpacklo_epi8(t20, t21); + __m128i t31 = _mm_unpackhi_epi8(t20, t21); + + a.val = _mm_unpacklo_epi8(t30, t31); + b.val = _mm_unpackhi_epi8(t30, t31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) +{ +#if CV_SSE4_1 + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + __m128i s0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + __m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1); + __m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1); + __m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1); + const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13); + const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14); + const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); + a0 = _mm_shuffle_epi8(a0, sh_b); + b0 = _mm_shuffle_epi8(b0, sh_g); + c0 = _mm_shuffle_epi8(c0, sh_r); + a.val = a0; + b.val = b0; + c.val = c0; +#elif CV_SSSE3 + const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14); + const __m128i m1 = _mm_alignr_epi8(m0, m0, 11); + const __m128i m2 = _mm_alignr_epi8(m0, m0, 6); + + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + + __m128i s0 = _mm_shuffle_epi8(t0, m0); + __m128i s1 = _mm_shuffle_epi8(t1, m1); + __m128i s2 = _mm_shuffle_epi8(t2, m2); + + t0 = _mm_alignr_epi8(s1, _mm_slli_si128(s0, 10), 5); + a.val = _mm_alignr_epi8(s2, t0, 5); + + t1 = _mm_alignr_epi8(_mm_srli_si128(s1, 5), _mm_slli_si128(s0, 5), 6); + b.val = _mm_alignr_epi8(_mm_srli_si128(s2, 5), t1, 5); + + t2 = _mm_alignr_epi8(_mm_srli_si128(s2, 10), s1, 11); + c.val = _mm_alignr_epi8(t2, s0, 11); +#else + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + + __m128i t10 = _mm_unpacklo_epi8(t00, _mm_unpackhi_epi64(t01, t01)); + __m128i t11 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t00, t00), t02); + __m128i t12 = _mm_unpacklo_epi8(t01, _mm_unpackhi_epi64(t02, t02)); + + __m128i t20 = _mm_unpacklo_epi8(t10, _mm_unpackhi_epi64(t11, t11)); + __m128i t21 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t10, t10), t12); + __m128i t22 = _mm_unpacklo_epi8(t11, _mm_unpackhi_epi64(t12, t12)); + + __m128i t30 = _mm_unpacklo_epi8(t20, _mm_unpackhi_epi64(t21, t21)); + __m128i t31 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t20, t20), t22); + __m128i t32 = _mm_unpacklo_epi8(t21, _mm_unpackhi_epi64(t22, t22)); + + a.val = _mm_unpacklo_epi8(t30, _mm_unpackhi_epi64(t31, t31)); + b.val = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t30, t30), t32); + c.val = _mm_unpacklo_epi8(t31, _mm_unpackhi_epi64(t32, t32)); +#endif +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) +{ + __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ... + __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ... + __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); // a8 b8 c8 d8 ... + __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 48)); // a12 b12 c12 d12 ... + + __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 a8 b0 b8 ... + __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a2 a10 b2 b10 ... + __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a4 a12 b4 b12 ... + __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a6 a14 b6 b14 ... + + u0 = _mm_unpacklo_epi8(v0, v2); // a0 a4 a8 a12 ... + u1 = _mm_unpacklo_epi8(v1, v3); // a2 a6 a10 a14 ... + u2 = _mm_unpackhi_epi8(v0, v2); // a1 a5 a9 a13 ... + u3 = _mm_unpackhi_epi8(v1, v3); // a3 a7 a11 a15 ... + + v0 = _mm_unpacklo_epi8(u0, u1); // a0 a2 a4 a6 ... + v1 = _mm_unpacklo_epi8(u2, u3); // a1 a3 a5 a7 ... + v2 = _mm_unpackhi_epi8(u0, u1); // c0 c2 c4 c6 ... + v3 = _mm_unpackhi_epi8(u2, u3); // c1 c3 c5 c7 ... + + a.val = _mm_unpacklo_epi8(v0, v1); + b.val = _mm_unpackhi_epi8(v0, v1); + c.val = _mm_unpacklo_epi8(v2, v3); + d.val = _mm_unpackhi_epi8(v2, v3); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) +{ + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3 + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7 + + __m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5 + __m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7 + __m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6 + __m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7 + + a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7 + b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7 +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) +{ +#if CV_SSE4_1 + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); + __m128i v2 = _mm_loadu_si128((__m128i*)(ptr + 16)); + __m128i a0 = _mm_blend_epi16(_mm_blend_epi16(v0, v1, 0x92), v2, 0x24); + __m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24); + __m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24); + + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + a0 = _mm_shuffle_epi8(a0, sh_a); + b0 = _mm_shuffle_epi8(b0, sh_b); + c0 = _mm_shuffle_epi8(c0, sh_c); + + a.val = a0; + b.val = b0; + c.val = c0; +#else + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8)); + __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + + __m128i t10 = _mm_unpacklo_epi16(t00, _mm_unpackhi_epi64(t01, t01)); + __m128i t11 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t00, t00), t02); + __m128i t12 = _mm_unpacklo_epi16(t01, _mm_unpackhi_epi64(t02, t02)); + + __m128i t20 = _mm_unpacklo_epi16(t10, _mm_unpackhi_epi64(t11, t11)); + __m128i t21 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t10, t10), t12); + __m128i t22 = _mm_unpacklo_epi16(t11, _mm_unpackhi_epi64(t12, t12)); + + a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21)); + b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22); + c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22)); +#endif +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) +{ + __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1 + __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 8)); // a2 b2 c2 d2 ... + __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ... + __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 24)); // a6 b6 c6 d6 ... + + __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 a4 b0 b4 ... + __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a1 a5 b1 b5 ... + __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a2 a6 b2 b6 ... + __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a3 a7 b3 b7 ... + + u0 = _mm_unpacklo_epi16(v0, v2); // a0 a2 a4 a6 ... + u1 = _mm_unpacklo_epi16(v1, v3); // a1 a3 a5 a7 ... + u2 = _mm_unpackhi_epi16(v0, v2); // c0 c2 c4 c6 ... + u3 = _mm_unpackhi_epi16(v1, v3); // c1 c3 c5 c7 ... + + a.val = _mm_unpacklo_epi16(u0, u1); + b.val = _mm_unpackhi_epi16(u0, u1); + c.val = _mm_unpacklo_epi16(u2, u3); + d.val = _mm_unpackhi_epi16(u2, u3); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) +{ + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 4)); // a2 b2 a3 b3 + + __m128i v2 = _mm_unpacklo_epi32(v0, v1); // a0 a2 b0 b2 + __m128i v3 = _mm_unpackhi_epi32(v0, v1); // a1 a3 b1 b3 + + a.val = _mm_unpacklo_epi32(v2, v3); // a0 a1 a2 a3 + b.val = _mm_unpackhi_epi32(v2, v3); // b0 b1 ab b3 +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) +{ + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 4)); + __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 8)); + + __m128i t10 = _mm_unpacklo_epi32(t00, _mm_unpackhi_epi64(t01, t01)); + __m128i t11 = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t00, t00), t02); + __m128i t12 = _mm_unpacklo_epi32(t01, _mm_unpackhi_epi64(t02, t02)); + + a.val = _mm_unpacklo_epi32(t10, _mm_unpackhi_epi64(t11, t11)); + b.val = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t10, t10), t12); + c.val = _mm_unpacklo_epi32(t11, _mm_unpackhi_epi64(t12, t12)); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 s0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0 + v_uint32x4 s1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1 + v_uint32x4 s2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2 + v_uint32x4 s3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) +{ + __m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1 + __m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3 + + a.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a1 a2 a3 + b.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(3, 1, 3, 1)); // b0 b1 ab b3 +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) +{ + __m128 t0 = _mm_loadu_ps(ptr + 0); + __m128 t1 = _mm_loadu_ps(ptr + 4); + __m128 t2 = _mm_loadu_ps(ptr + 8); + + __m128 at12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 1, 0, 2)); + a.val = _mm_shuffle_ps(t0, at12, _MM_SHUFFLE(2, 0, 3, 0)); + + __m128 bt01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 0, 0, 1)); + __m128 bt12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 2, 0, 3)); + b.val = _mm_shuffle_ps(bt01, bt12, _MM_SHUFFLE(2, 0, 2, 0)); + + __m128 ct01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 1, 0, 2)); + c.val = _mm_shuffle_ps(ct01, t2, _MM_SHUFFLE(3, 0, 2, 0)); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) +{ + __m128 t0 = _mm_loadu_ps(ptr + 0); + __m128 t1 = _mm_loadu_ps(ptr + 4); + __m128 t2 = _mm_loadu_ps(ptr + 8); + __m128 t3 = _mm_loadu_ps(ptr + 12); + __m128 t02lo = _mm_unpacklo_ps(t0, t2); + __m128 t13lo = _mm_unpacklo_ps(t1, t3); + __m128 t02hi = _mm_unpackhi_ps(t0, t2); + __m128 t13hi = _mm_unpackhi_ps(t1, t3); + a.val = _mm_unpacklo_ps(t02lo, t13lo); + b.val = _mm_unpackhi_ps(t02lo, t13lo); + c.val = _mm_unpacklo_ps(t02hi, t13hi); + d.val = _mm_unpackhi_ps(t02hi, t13hi); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t1)); + b = v_uint64x2(_mm_unpackhi_epi64(t0, t1)); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0, b0 + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0, a1 + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // b1, c1 + + t1 = _mm_shuffle_epi32(t1, 0x4e); // a1, c0 + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t1)); + b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2)); + c = v_uint64x2(_mm_unpackhi_epi64(t1, t2)); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, + v_uint64x2& b, v_uint64x2& c, v_uint64x2& d) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0 d0 + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // a1 b1 + __m128i t3 = _mm_loadu_si128((const __m128i*)(ptr + 6)); // c1 d1 + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t2)); + b = v_uint64x2(_mm_unpackhi_epi64(t0, t2)); + c = v_uint64x2(_mm_unpacklo_epi64(t1, t3)); + d = v_uint64x2(_mm_unpackhi_epi64(t1, t3)); +} + +// store interleave + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi8(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi8(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ +#if CV_SSE4_1 + const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); + const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); + const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); + __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); + __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); + __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); + + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + __m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0); + __m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0); + __m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0); +#elif CV_SSSE3 + const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5); + const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10); + const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15); + + __m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5); + t0 = _mm_alignr_epi8(c.val, t0, 5); + __m128i v0 = _mm_shuffle_epi8(t0, m0); + + __m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6); + t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5); + __m128i v1 = _mm_shuffle_epi8(t1, m1); + + __m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11); + t2 = _mm_alignr_epi8(t2, a.val, 11); + __m128i v2 = _mm_shuffle_epi8(t2, m2); +#else + __m128i z = _mm_setzero_si128(); + __m128i ab0 = _mm_unpacklo_epi8(a.val, b.val); + __m128i ab1 = _mm_unpackhi_epi8(a.val, b.val); + __m128i c0 = _mm_unpacklo_epi8(c.val, z); + __m128i c1 = _mm_unpackhi_epi8(c.val, z); + + __m128i p00 = _mm_unpacklo_epi16(ab0, c0); + __m128i p01 = _mm_unpackhi_epi16(ab0, c0); + __m128i p02 = _mm_unpacklo_epi16(ab1, c1); + __m128i p03 = _mm_unpackhi_epi16(ab1, c1); + + __m128i p10 = _mm_unpacklo_epi32(p00, p01); + __m128i p11 = _mm_unpackhi_epi32(p00, p01); + __m128i p12 = _mm_unpacklo_epi32(p02, p03); + __m128i p13 = _mm_unpackhi_epi32(p02, p03); + + __m128i p20 = _mm_unpacklo_epi64(p10, p11); + __m128i p21 = _mm_unpackhi_epi64(p10, p11); + __m128i p22 = _mm_unpacklo_epi64(p12, p13); + __m128i p23 = _mm_unpackhi_epi64(p12, p13); + + p20 = _mm_slli_si128(p20, 1); + p22 = _mm_slli_si128(p22, 1); + + __m128i p30 = _mm_slli_epi64(_mm_unpacklo_epi32(p20, p21), 8); + __m128i p31 = _mm_srli_epi64(_mm_unpackhi_epi32(p20, p21), 8); + __m128i p32 = _mm_slli_epi64(_mm_unpacklo_epi32(p22, p23), 8); + __m128i p33 = _mm_srli_epi64(_mm_unpackhi_epi32(p22, p23), 8); + + __m128i p40 = _mm_unpacklo_epi64(p30, p31); + __m128i p41 = _mm_unpackhi_epi64(p30, p31); + __m128i p42 = _mm_unpacklo_epi64(p32, p33); + __m128i p43 = _mm_unpackhi_epi64(p32, p33); + + __m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10)); + __m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6)); + __m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2)); +#endif + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, const v_uint8x16& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + __m128i u0 = _mm_unpacklo_epi8(a.val, c.val); // a0 c0 a1 c1 ... + __m128i u1 = _mm_unpackhi_epi8(a.val, c.val); // a8 c8 a9 c9 ... + __m128i u2 = _mm_unpacklo_epi8(b.val, d.val); // b0 d0 b1 d1 ... + __m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ... + + __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ... + __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ... + __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ... + __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ... + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + _mm_stream_si128((__m128i*)(ptr + 48), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + _mm_store_si128((__m128i*)(ptr + 48), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + _mm_storeu_si128((__m128i*)(ptr + 48), v3); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi16(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi16(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, + const v_uint16x8& b, const v_uint16x8& c, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ +#if CV_SSE4_1 + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); + __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); + __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); + + __m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24); + __m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24); + __m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24); +#else + __m128i z = _mm_setzero_si128(); + __m128i ab0 = _mm_unpacklo_epi16(a.val, b.val); + __m128i ab1 = _mm_unpackhi_epi16(a.val, b.val); + __m128i c0 = _mm_unpacklo_epi16(c.val, z); + __m128i c1 = _mm_unpackhi_epi16(c.val, z); + + __m128i p10 = _mm_unpacklo_epi32(ab0, c0); + __m128i p11 = _mm_unpackhi_epi32(ab0, c0); + __m128i p12 = _mm_unpacklo_epi32(ab1, c1); + __m128i p13 = _mm_unpackhi_epi32(ab1, c1); + + __m128i p20 = _mm_unpacklo_epi64(p10, p11); + __m128i p21 = _mm_unpackhi_epi64(p10, p11); + __m128i p22 = _mm_unpacklo_epi64(p12, p13); + __m128i p23 = _mm_unpackhi_epi64(p12, p13); + + p20 = _mm_slli_si128(p20, 2); + p22 = _mm_slli_si128(p22, 2); + + __m128i p30 = _mm_unpacklo_epi64(p20, p21); + __m128i p31 = _mm_unpackhi_epi64(p20, p21); + __m128i p32 = _mm_unpacklo_epi64(p22, p23); + __m128i p33 = _mm_unpackhi_epi64(p22, p23); + + __m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10)); + __m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6)); + __m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2)); +#endif + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + const v_uint16x8& c, const v_uint16x8& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + __m128i u0 = _mm_unpacklo_epi16(a.val, c.val); // a0 c0 a1 c1 ... + __m128i u1 = _mm_unpackhi_epi16(a.val, c.val); // a4 c4 a5 c5 ... + __m128i u2 = _mm_unpacklo_epi16(b.val, d.val); // b0 d0 b1 d1 ... + __m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ... + + __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ... + __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ... + __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ... + __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ... + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + _mm_stream_si128((__m128i*)(ptr + 24), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + _mm_store_si128((__m128i*)(ptr + 24), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + _mm_storeu_si128((__m128i*)(ptr + 24), v3); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3; + v_transpose4x4(a, b, c, z, u0, u1, u2, u3); + + __m128i v0 = _mm_or_si128(u0.val, _mm_slli_si128(u1.val, 12)); + __m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8)); + __m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4)); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + _mm_stream_si128((__m128i*)(ptr + 8), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + _mm_store_si128((__m128i*)(ptr + 8), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + _mm_storeu_si128((__m128i*)(ptr + 8), v2); + } +} + +inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + v_uint32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0.val); + _mm_stream_si128((__m128i*)(ptr + 4), v1.val); + _mm_stream_si128((__m128i*)(ptr + 8), v2.val); + _mm_stream_si128((__m128i*)(ptr + 12), v3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0.val); + _mm_store_si128((__m128i*)(ptr + 4), v1.val); + _mm_store_si128((__m128i*)(ptr + 8), v2.val); + _mm_store_si128((__m128i*)(ptr + 12), v3.val); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0.val); + _mm_storeu_si128((__m128i*)(ptr + 4), v1.val); + _mm_storeu_si128((__m128i*)(ptr + 8), v2.val); + _mm_storeu_si128((__m128i*)(ptr + 12), v3.val); + } +} + +// 2-channel, float only +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 v0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1 + __m128 v1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3 + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + } +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0)); + __m128 v0 = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 u2 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 u3 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 v1 = _mm_shuffle_ps(u2, u3, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 u4 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(3, 3, 2, 2)); + __m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0)); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + } +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 u0 = _mm_unpacklo_ps(a.val, c.val); + __m128 u1 = _mm_unpacklo_ps(b.val, d.val); + __m128 u2 = _mm_unpackhi_ps(a.val, c.val); + __m128 u3 = _mm_unpackhi_ps(b.val, d.val); + __m128 v0 = _mm_unpacklo_ps(u0, u1); + __m128 v2 = _mm_unpacklo_ps(u2, u3); + __m128 v1 = _mm_unpackhi_ps(u0, u1); + __m128 v3 = _mm_unpackhi_ps(u2, u3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + _mm_stream_ps(ptr + 12, v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + _mm_store_ps(ptr + 12, v3); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + _mm_storeu_ps(ptr + 12, v3); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi64(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val)); + __m128i v2 = _mm_unpackhi_epi64(b.val, c.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, const v_uint64x2& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, d.val); + __m128i v2 = _mm_unpackhi_epi64(a.val, b.val); + __m128i v3 = _mm_unpackhi_epi64(c.val, d.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + _mm_stream_si128((__m128i*)(ptr + 6), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + _mm_store_si128((__m128i*)(ptr + 6), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + _mm_storeu_si128((__m128i*)(ptr + 6), v3); + } +} + +#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(_mm_cvtepi32_ps(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + return v_float32x4(_mm_cvtpd_ps(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(_mm_movelh_ps(_mm_cvtpd_ps(a.val), _mm_cvtpd_ps(b.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(_mm_cvtepi32_pd(a.val)); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(_mm_cvtps_pd(a.val)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val))); +} + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x2 v_cvt_f64(const v_int64x2& v) +{ + // constants encoded as floating-point + __m128i magic_i_hi32 = _mm_set1_epi64x(0x4530000080000000); // 2^84 + 2^63 + __m128i magic_i_all = _mm_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m128d magic_d_all = _mm_castsi128_pd(magic_i_all); + // Blend the 32 lowest significant bits of v with magic_int_lo +#if CV_SSE4_1 + __m128i magic_i_lo = _mm_set1_epi64x(0x4330000000000000); // 2^52 + __m128i v_lo = _mm_blend_epi16(v.val, magic_i_lo, 0xcc); +#else + __m128i magic_i_lo = _mm_set1_epi32(0x43300000); // 2^52 + __m128i v_lo = _mm_unpacklo_epi32(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(0, 0, 2, 0)), magic_i_lo); +#endif + // Extract the 32 most significant bits of v + __m128i v_hi = _mm_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm_xor_si128(v_hi, magic_i_hi32); + // Compute in double precision + __m128d v_hi_dbl = _mm_sub_pd(_mm_castsi128_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m128d result = _mm_add_pd(v_hi_dbl, _mm_castsi128_pd(v_lo)); + return v_float64x2(result); +} + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]])); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]]), + _mm_setr_pi8(tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]) + )); +#endif +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3]), + *(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7]))); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3])), + _mm_setr_pi16(*(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7])) + )); +#endif +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), + *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])), + _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])) + )); +#endif +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int16x8(_mm_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], + tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]])); +#else + return v_int16x8(_mm_setr_epi64( + _mm_setr_pi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]), + _mm_setr_pi16(tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]) + )); +#endif +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int16x8(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), + *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +#else + return v_int16x8(_mm_setr_epi64( + _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])), + _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])) + )); +#endif +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]], + tab[idx[2]], tab[idx[3]])); +#else + return v_int32x4(_mm_setr_epi64( + _mm_setr_pi32(tab[idx[0]], tab[idx[1]]), + _mm_setr_pi32(tab[idx[2]], tab[idx[3]]) + )); +#endif +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(_mm_loadu_si128((const __m128i*)(tab + idx[0]))); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(_mm_set_epi64x(tab[idx[1]], tab[idx[0]])); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(_mm_loadu_si128((const __m128i*)(tab + idx[0]))); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]])); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_float64x2(_mm_castsi128_pd(_mm_loadu_si128((const __m128i*)(tab + idx[0])))); } + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + int idx[2]; + v_store_low(idx, idxvec); + return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]])); +} + +// loads pairs from the table and deinterleaves them, e.g. returns: +// x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]), +// y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1]) +// note that the indices are float's indices, not the float-pair indices. +// in theory, this function can be used to implement bilinear interpolation, +// when idxvec are the offsets within the image. +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + __m128 z = _mm_setzero_ps(); + __m128 xy01 = _mm_loadl_pi(z, (__m64*)(tab + idx[0])); + __m128 xy23 = _mm_loadl_pi(z, (__m64*)(tab + idx[2])); + xy01 = _mm_loadh_pi(xy01, (__m64*)(tab + idx[1])); + xy23 = _mm_loadh_pi(xy23, (__m64*)(tab + idx[3])); + __m128 xxyy02 = _mm_unpacklo_ps(xy01, xy23); + __m128 xxyy13 = _mm_unpackhi_ps(xy01, xy23); + x = v_float32x4(_mm_unpacklo_ps(xxyy02, xxyy13)); + y = v_float32x4(_mm_unpackhi_ps(xxyy02, xxyy13)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int idx[2]; + v_store_low(idx, idxvec); + __m128d xy0 = _mm_loadu_pd(tab + idx[0]); + __m128d xy1 = _mm_loadu_pd(tab + idx[1]); + x = v_float64x2(_mm_unpacklo_pd(xy0, xy1)); + y = v_float64x2(_mm_unpackhi_pd(xy0, xy1)); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200))); +#else + __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0)); + a = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a))); +#endif +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400))); +#else + __m128i a = _mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a))); +#endif +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100))); +#else + __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int16x8(_mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0))); +#endif +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100))); +#else + return v_int16x8(_mm_unpacklo_epi16(vec.val, _mm_unpackhi_epi64(vec.val, vec.val))); +#endif +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + return v_int32x4(_mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0))); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100))); +#else + __m128i mask = _mm_set1_epi64x(0x00000000FFFFFFFF); + __m128i a = _mm_srli_si128(_mm_or_si128(_mm_andnot_si128(mask, vec.val), _mm_and_si128(mask, _mm_sll_epi32(vec.val, _mm_set_epi64x(0, 8)))), 1); + return v_int8x16(_mm_srli_si128(_mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 1, 0, 3)), 2)); +#endif +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100))); +#else + return v_int16x8(_mm_srli_si128(_mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(2, 1, 0, 3)), 2)); +#endif +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +template +inline uchar v_extract_n(const v_uint8x16& v) +{ +#if CV_SSE4_1 + return (uchar)_mm_extract_epi8(v.val, i); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline schar v_extract_n(const v_int8x16& v) +{ + return (schar)v_extract_n(v_reinterpret_as_u8(v)); +} + +template +inline ushort v_extract_n(const v_uint16x8& v) +{ + return (ushort)_mm_extract_epi16(v.val, i); +} + +template +inline short v_extract_n(const v_int16x8& v) +{ + return (short)v_extract_n(v_reinterpret_as_u16(v)); +} + +template +inline uint v_extract_n(const v_uint32x4& v) +{ +#if CV_SSE4_1 + return (uint)_mm_extract_epi32(v.val, i); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline int v_extract_n(const v_int32x4& v) +{ + return (int)v_extract_n(v_reinterpret_as_u32(v)); +} + +template +inline uint64 v_extract_n(const v_uint64x2& v) +{ +#ifdef CV__SIMD_NATIVE_mm_extract_epi64 + return (uint64)_v128_extract_epi64(v.val); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline int64 v_extract_n(const v_int64x2& v) +{ + return (int64)v_extract_n(v_reinterpret_as_u64(v)); +} + +template +inline float v_extract_n(const v_float32x4& v) +{ + union { uint iv; float fv; } d; + d.iv = v_extract_n(v_reinterpret_as_u32(v)); + return d.fv; +} + +template +inline double v_extract_n(const v_float64x2& v) +{ + union { uint64 iv; double dv; } d; + d.iv = v_extract_n(v_reinterpret_as_u64(v)); + return d.dv; +} + +template +inline v_int32x4 v_broadcast_element(const v_int32x4& v) +{ + return v_int32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i))); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& v) +{ + return v_uint32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i))); +} + +template +inline v_float32x4 v_broadcast_element(const v_float32x4& v) +{ + return v_float32x4(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE((char)i,(char)i,(char)i,(char)i))); +} + +////////////// FP16 support /////////////////////////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ +#if CV_FP16 + return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); +#else + const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000); + const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000); + const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000)); + __m128i bits = _mm_unpacklo_epi16(z, _mm_loadl_epi64((const __m128i*)ptr)); // h << 16 + __m128i e = _mm_and_si128(bits, maxexp), sign = _mm_and_si128(bits, signmask); + __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_xor_si128(bits, sign), 3), delta); // ((h & 0x7fff) << 13) + delta + __m128i zt = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_add_epi32(t, _mm_set1_epi32(1 << 23))), deltaf)); + + t = _mm_add_epi32(t, _mm_and_si128(delta, _mm_cmpeq_epi32(maxexp, e))); + __m128i zmask = _mm_cmpeq_epi32(e, z); + __m128i ft = v_select_si128(zmask, zt, t); + return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign))); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ +#if CV_FP16 + __m128i fp16_value = _mm_cvtps_ph(v.val, 0); + _mm_storel_epi64((__m128i*)ptr, fp16_value); +#else + const __m128i signmask = _mm_set1_epi32(0x80000000); + const __m128i rval = _mm_set1_epi32(0x3f000000); + + __m128i t = _mm_castps_si128(v.val); + __m128i sign = _mm_srai_epi32(_mm_and_si128(t, signmask), 16); + t = _mm_andnot_si128(signmask, t); + + __m128i finitemask = _mm_cmpgt_epi32(_mm_set1_epi32(0x47800000), t); + __m128i isnan = _mm_cmpgt_epi32(t, _mm_set1_epi32(0x7f800000)); + __m128i naninf = v_select_si128(isnan, _mm_set1_epi32(0x7e00), _mm_set1_epi32(0x7c00)); + __m128i tinymask = _mm_cmpgt_epi32(_mm_set1_epi32(0x38800000), t); + __m128i tt = _mm_castps_si128(_mm_add_ps(_mm_castsi128_ps(t), _mm_castsi128_ps(rval))); + tt = _mm_sub_epi32(tt, rval); + __m128i odd = _mm_and_si128(_mm_srli_epi32(t, 13), _mm_set1_epi32(1)); + __m128i nt = _mm_add_epi32(t, _mm_set1_epi32(0xc8000fff)); + nt = _mm_srli_epi32(_mm_add_epi32(nt, odd), 13); + t = v_select_si128(tinymask, tt, nt); + t = v_select_si128(finitemask, t, naninf); + t = _mm_or_si128(t, sign); + t = _mm_packs_epi32(t, t); + _mm_storel_epi64((__m128i*)ptr, t); +#endif +} + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_sse_em.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_sse_em.hpp new file mode 100644 index 0000000..6fb0881 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_sse_em.hpp @@ -0,0 +1,180 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP +#define OPENCV_HAL_INTRIN_SSE_EM_HPP + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \ + inline tp _v128_##fun(const tp& a) \ + { return _mm_##fun(a); } + +#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \ + inline tp _v128_##fun(const tp& a, const tp& b) \ + { return _mm_##fun(a, b); } + +#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \ + inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \ + { return _mm_##fun(a, b, c); } + +///////////////////////////// XOP ///////////////////////////// + +// [todo] define CV_XOP +#if 1 // CV_XOP +inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b) +{ + const __m128i delta = _mm_set1_epi32((int)0x80000000); + return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta)); +} +// wrapping XOP +#else +OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i) +#endif // !CV_XOP + +///////////////////////////// SSE4.1 ///////////////////////////// + +#if !CV_SSE4_1 + +/** Swizzle **/ +inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask) +{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); } + +/** Convert **/ +// 8 >> 16 +inline __m128i _v128_cvtepu8_epi16(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi8(a, z); +} +inline __m128i _v128_cvtepi8_epi16(const __m128i& a) +{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); } +// 8 >> 32 +inline __m128i _v128_cvtepu8_epi32(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); +} +inline __m128i _v128_cvtepi8_epi32(const __m128i& a) +{ + __m128i r = _mm_unpacklo_epi8(a, a); + r = _mm_unpacklo_epi8(r, r); + return _mm_srai_epi32(r, 24); +} +// 16 >> 32 +inline __m128i _v128_cvtepu16_epi32(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(a, z); +} +inline __m128i _v128_cvtepi16_epi32(const __m128i& a) +{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); } +// 32 >> 64 +inline __m128i _v128_cvtepu32_epi64(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(a, z); +} +inline __m128i _v128_cvtepi32_epi64(const __m128i& a) +{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); } + +/** Arithmetic **/ +inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b) +{ + __m128i c0 = _mm_mul_epu32(a, b); + __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32)); + __m128i d0 = _mm_unpacklo_epi32(c0, c1); + __m128i d1 = _mm_unpackhi_epi32(c0, c1); + return _mm_unpacklo_epi64(d0, d1); +} + +/** Math **/ +inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b) +{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); } + +// wrapping SSE4.1 +#else +OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i) +OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i) +OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i) +#endif // !CV_SSE4_1 + +///////////////////////////// Revolutionary ///////////////////////////// + +/** Convert **/ +// 16 << 8 +inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi8(a, z); +} +inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a) +{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); } +// 32 << 16 +inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi16(a, z); +} +inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a) +{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); } +// 64 << 32 +inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi32(a, z); +} +inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a) +{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); } + +/** Miscellaneous **/ +inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b) +{ + const __m128i m = _mm_set1_epi32(65535); + __m128i am = _v128_min_epu32(a, m); + __m128i bm = _v128_min_epu32(b, m); +#if CV_SSE4_1 + return _mm_packus_epi32(am, bm); +#else + const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768); + am = _mm_sub_epi32(am, d); + bm = _mm_sub_epi32(bm, d); + am = _mm_packs_epi32(am, bm); + return _mm_sub_epi16(am, nd); +#endif +} + +template +inline int64 _v128_extract_epi64(const __m128i& a) +{ +#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/)) +#define CV__SIMD_NATIVE_mm_extract_epi64 1 + return _mm_extract_epi64(a, i); +#else + CV_DECL_ALIGNED(16) int64 tmp[2]; + _mm_store_si128((__m128i*)tmp, a); + return tmp[i]; +#endif +} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_vsx.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_vsx.hpp new file mode 100644 index 0000000..b198643 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_vsx.hpp @@ -0,0 +1,1608 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_VSX_HPP +#define OPENCV_HAL_VSX_HPP + +#include +#include "opencv2/core/utility.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + vec_uchar16 val; + + explicit v_uint8x16(const vec_uchar16& v) : val(v) + {} + v_uint8x16() + {} + v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v)) + {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)) + {} + + static inline v_uint8x16 zero() { return v_uint8x16(vec_uchar16_z); } + + uchar get0() const + { return vec_extract(val, 0); } +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + vec_char16 val; + + explicit v_int8x16(const vec_char16& v) : val(v) + {} + v_int8x16() + {} + v_int8x16(vec_bchar16 v) : val(vec_char16_c(v)) + {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)) + {} + + static inline v_int8x16 zero() { return v_int8x16(vec_char16_z); } + + schar get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + vec_ushort8 val; + + explicit v_uint16x8(const vec_ushort8& v) : val(v) + {} + v_uint16x8() + {} + v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v)) + {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7)) + {} + + static inline v_uint16x8 zero() { return v_uint16x8(vec_ushort8_z); } + + ushort get0() const + { return vec_extract(val, 0); } +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + vec_short8 val; + + explicit v_int16x8(const vec_short8& v) : val(v) + {} + v_int16x8() + {} + v_int16x8(vec_bshort8 v) : val(vec_short8_c(v)) + {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7)) + {} + + static inline v_int16x8 zero() { return v_int16x8(vec_short8_z); } + + short get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + vec_uint4 val; + + explicit v_uint32x4(const vec_uint4& v) : val(v) + {} + v_uint32x4() + {} + v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v)) + {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3)) + {} + + static inline v_uint32x4 zero() { return v_uint32x4(vec_uint4_z); } + + uint get0() const + { return vec_extract(val, 0); } +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + vec_int4 val; + + explicit v_int32x4(const vec_int4& v) : val(v) + {} + v_int32x4() + {} + v_int32x4(vec_bint4 v) : val(vec_int4_c(v)) + {} + v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3)) + {} + + static inline v_int32x4 zero() { return v_int32x4(vec_int4_z); } + + int get0() const + { return vec_extract(val, 0); } +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + vec_float4 val; + + explicit v_float32x4(const vec_float4& v) : val(v) + {} + v_float32x4() + {} + v_float32x4(vec_bint4 v) : val(vec_float4_c(v)) + {} + v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3)) + {} + + static inline v_float32x4 zero() { return v_float32x4(vec_float4_z); } + + float get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + vec_udword2 val; + + explicit v_uint64x2(const vec_udword2& v) : val(v) + {} + v_uint64x2() + {} + v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v)) + {} + v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1)) + {} + + static inline v_uint64x2 zero() { return v_uint64x2(vec_udword2_z); } + + uint64 get0() const + { return vec_extract(val, 0); } +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + vec_dword2 val; + + explicit v_int64x2(const vec_dword2& v) : val(v) + {} + v_int64x2() + {} + v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v)) + {} + v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1)) + {} + + static inline v_int64x2 zero() { return v_int64x2(vec_dword2_z); } + + int64 get0() const + { return vec_extract(val, 0); } +}; + +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + vec_double2 val; + + explicit v_float64x2(const vec_double2& v) : val(v) + {} + v_float64x2() + {} + v_float64x2(vec_bdword2 v) : val(vec_double2_c(v)) + {} + v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1)) + {} + + static inline v_float64x2 zero() { return v_float64x2(vec_double2_z); } + + double get0() const + { return vec_extract(val, 0); } +}; + +#define OPENCV_HAL_IMPL_VSX_EXTRACT_N(_Tpvec, _Tp) \ +template inline _Tp v_extract_n(VSX_UNUSED(_Tpvec v)) { return vec_extract(v.val, i); } + +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint8x16, uchar) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int8x16, schar) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint16x8, ushort) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int16x8, short) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint32x4, uint) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int32x4, int) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint64x2, uint64) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int64x2, int64) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float32x4, float) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double) + +//////////////// Load and store operations /////////////// + +/* + * clang-5 aborted during parse "vec_xxx_c" only if it's + * inside a function template which is defined by preprocessor macro. + * + * if vec_xxx_c defined as C++ cast, clang-5 will pass it +*/ +#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \ +{ return _Tpvec((cast)a.val); } + +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2) +OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2) + +#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(ld(0, ptr)); } \ +inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \ +{ return _Tpvec(ld_a(0, ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vec_ld_l8(ptr)); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ st(a.val, 0, ptr); } \ +inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ +inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ vec_st_l8(a.val, ptr); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ vec_st_h8(a.val, ptr); } + +// working around gcc bug for aligned ld/st +// if runtime check for vec_ld/st fail we failback to unaligned ld/st +// https://github.com/opencv/opencv/issues/13211 +#ifdef CV_COMPILER_VSX_BROKEN_ALIGNED + #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \ + OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vsx_ld, vsx_st, vsx_st) +#else + #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \ + OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st) +#endif + +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float) + +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) + +//////////////// Value reordering /////////////// + +/* de&interleave */ +#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec) \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b) \ +{ vec_ld_deinterleave(ptr, a.val, b.val);} \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \ + _Tpvec& b, _Tpvec& c) \ +{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); } \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ +{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, ptr); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \ + const _Tpvec& b, const _Tpvec& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, c.val, ptr); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ + const _Tpvec& c, const _Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); } + +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64, v_int64x2) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64, v_uint64x2) + +/* Expand */ +#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = fh(a.val); \ + b1.val = fl(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ return _Tpwvec(fh(a.val)); } \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ return _Tpwvec(fl(a.val)); } \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ return _Tpwvec(fh(vec_ld_l8(ptr))); } + +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh) +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh) +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh) + +/* Load and zero expand a 4 byte value into the second dword, first is don't care. */ +#if !defined(CV_COMPILER_VSX_BROKEN_ASM) + #define _LXSIWZX(out, ptr, T) __asm__ ("lxsiwzx %x0, 0, %1\r\n" : "=wa"(out) : "r" (ptr) : "memory"); +#else + /* This is compiler-agnostic, but will introduce an unneeded splat on the critical path. */ + #define _LXSIWZX(out, ptr, T) out = (T)vec_udword2_sp(*(uint32_t*)(ptr)); +#endif + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + // Zero-extend the extra 24B instead of unpacking. Usually faster in small kernel + // Likewise note, value is zero extended and upper 4 bytes are zero'ed. + vec_uchar16 pmu = {8, 12, 12, 12, 9, 12, 12, 12, 10, 12, 12, 12, 11, 12, 12, 12}; + vec_uchar16 out; + + _LXSIWZX(out, ptr, vec_uchar16); + out = vec_perm(out, out, pmu); + return v_uint32x4((vec_uint4)out); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + vec_char16 out; + vec_short8 outs; + vec_int4 outw; + + _LXSIWZX(out, ptr, vec_char16); + outs = vec_unpackl(out); + outw = vec_unpackh(outs); + return v_int32x4(outw); +} + +/* pack */ +#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(pkfnc(a.val, b.val)); \ +} \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + vec_st_l8(pkfnc(a.val, a.val), ptr); \ +} \ +template \ +inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + const __vector _Tpvn vn = vec_splats((_Tpvn)n); \ + const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \ + return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn))); \ +} \ +template \ +inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + const __vector _Tpvn vn = vec_splats((_Tpvn)n); \ + const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \ + vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr); \ +} + +OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short, + vec_sr, vec_packs, vec_adds, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short, + vec_sra, vec_packs, vec_adds, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int, + vec_sr, vec_packs, vec_add, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int, + vec_sra, vec_packs, vec_add, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long, + vec_sr, vec_pack, vec_add, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long, + vec_sra, vec_pack, vec_add, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short, + vec_sra, vec_packsu, vec_adds, pack_u) +OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int, + vec_sra, vec_packsu, vec_add, pack_u) +// Following variant is not implemented on other platforms: +//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long, +// vec_sra, vec_packsu, vec_add, pack_u) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_uchar16 ab = vec_pack(a.val, b.val); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + vec_ushort8 ab = vec_pack(a.val, b.val); + vec_ushort8 cd = vec_pack(c.val, d.val); + return v_uint8x16(vec_pack(ab, cd)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + vec_uint4 ab = vec_pack(a.val, b.val); + vec_uint4 cd = vec_pack(c.val, d.val); + vec_uint4 ef = vec_pack(e.val, f.val); + vec_uint4 gh = vec_pack(g.val, h.val); + + vec_ushort8 abcd = vec_pack(ab, cd); + vec_ushort8 efgh = vec_pack(ef, gh); + return v_uint8x16(vec_pack(abcd, efgh)); +} + +/* Recombine */ +template +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) +{ + b0.val = vec_mergeh(a0.val, a1.val); + b1.val = vec_mergel(a0.val, a1.val); +} + +template +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(vec_mergesql(a.val, b.val)); } + +template +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(vec_mergesqh(a.val, b.val)); } + +template +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) +{ + c.val = vec_mergesqh(a.val, b.val); + d.val = vec_mergesql(a.val, b.val); +} + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ +/** Arithmetics **/ +#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(intrin(a.val, b.val)); } \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub) + +// saturating multiply +#define OPENCV_HAL_IMPL_VSX_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint16x8, v_uint32x4) + +template +inline void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d) +{ + Twvec p0 = Twvec(vec_mule(a.val, b.val)); + Twvec p1 = Twvec(vec_mulo(a.val, b.val)); + v_zip(p0, p1, c, d); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + vec_int4 p0 = vec_mule(a.val, b.val); + vec_int4 p1 = vec_mulo(a.val, b.val); + static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; + return v_int16x8(vec_perm(vec_short8_c(p0), vec_short8_c(p1), perm)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_uint4 p0 = vec_mule(a.val, b.val); + vec_uint4 p1 = vec_mulo(a.val, b.val); + static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; + return v_uint16x8(vec_perm(vec_ushort8_c(p0), vec_ushort8_c(p1), perm)); +} + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \ +template \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_mul_wrap, vec_mul) + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc) \ +inline _Tpvec operator << (const _Tpvec& a, int imm) \ +{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int imm) \ +{ return _Tpvec(shr(a.val, splfunc(imm))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(shr(a.val, splfunc(imm))); } + +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_sr, vec_uchar16_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_sr, vec_ushort8_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_sr, vec_uint4_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_sr, vec_udword2_sp) +// algebraic right shift +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_sra, vec_uchar16_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_sra, vec_ushort8_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_sra, vec_uint4_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_sra, vec_udword2_sp) + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ return _Tpvec(vec_not(a.val)); } + +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2) + +/** Bitwise select **/ +#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); } + +OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpeq(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpne(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmplt(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpgt(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmple(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpge(a.val, b.val)); } + +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(vec_cmpeq(a.val, a.val)); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(vec_cmpeq(a.val, a.val)); } + +/** min/max **/ +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max) + +/** Rotate **/ +#define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast) \ +template \ +inline _Tpvec v_rotate_##suffix(const _Tpvec& a) \ +{ \ + const int wd = imm * sizeof(typename _Tpvec::lane_type); \ + if (wd > 15) \ + return _Tpvec::zero(); \ + return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3))); \ +} + +#define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast) \ +OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \ +OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast) + +OPENCV_IMPL_VSX_ROTATE_LR(v_uint8x16, vec_uchar16) +OPENCV_IMPL_VSX_ROTATE_LR(v_int8x16, vec_char16) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8) +OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8, vec_short8) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4) +OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4, vec_int4) +OPENCV_IMPL_VSX_ROTATE_LR(v_float32x4, vec_float4) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2) +OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2, vec_dword2) +OPENCV_IMPL_VSX_ROTATE_LR(v_float64x2, vec_double2) + +template +inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) +{ + enum { CV_SHIFT = 16 - imm * (sizeof(typename _Tpvec::lane_type)) }; + if (CV_SHIFT == 16) + return a; +#ifdef __IBMCPP__ + return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15)); +#else + return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT)); +#endif +} + +template +inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) +{ + enum { CV_SHIFT = imm * (sizeof(typename _Tpvec::lane_type)) }; + if (CV_SHIFT == 16) + return b; + return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT)); +} + +#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2) \ +template \ +inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + if (imm == 1) \ + return _Tpvec(vec_permi(rg1.val, rg2.val, 2)); \ + return imm ? b : a; \ +} + +#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec) \ +OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left, b, a) \ +OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b) + +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2) +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2) +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2) + +/* Reverse */ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + static const vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_uint8x16(vec_perm(vec, vec, perm)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + static const vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u16(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + static const vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u32(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + static const vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u64(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +/* Extract */ +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ return v_rotate_right(a, b); } + +////////// Reduce and mask ///////// + +/** Reduce **/ +inline uint v_reduce_sum(const v_uint8x16& a) +{ + const vec_uint4 zero4 = vec_uint4_z; + vec_uint4 sum4 = vec_sum4s(a.val, zero4); + return (uint)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + const vec_int4 zero4 = vec_int4_z; + vec_int4 sum4 = vec_sum4s(a.val, zero4); + return (int)vec_extract(vec_sums(sum4, zero4), 3); +} +inline int v_reduce_sum(const v_int16x8& a) +{ + const vec_int4 zero = vec_int4_z; + return saturate_cast(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3)); +} +inline uint v_reduce_sum(const v_uint16x8& a) +{ + const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8)))); + return saturate_cast(vec_extract(vec_sums(v4, vec_int4_z), 3)); +} + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} +inline double v_reduce_sum(const v_float64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + rs = func(rs, vec_sld(rs, rs, 4)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min) + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + rs = func(rs, vec_sld(rs, rs, 4)); \ + rs = func(rs, vec_sld(rs, rs, 2)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, min, vec_min) + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val)); + ac = vec_add(ac, vec_sld(ac, ac, 8)); + + vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val)); + bd = vec_add(bd, vec_sld(bd, bd, 8)); + return v_float32x4(vec_mergeh(ac, bd)); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + const vec_uint4 zero4 = vec_uint4_z; + vec_uint4 sum4 = vec_sum4s(vec_absd(a.val, b.val), zero4); + return (unsigned)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + const vec_int4 zero4 = vec_int4_z; + vec_char16 ad = vec_abss(vec_subs(a.val, b.val)); + vec_int4 sum4 = vec_sum4s(ad, zero4); + return (unsigned)vec_extract(vec_sums(sum4, zero4), 3); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_ushort8 ad = vec_absd(a.val, b.val); + VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z); + return (unsigned)vec_extract(sum, 3); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + const vec_int4 zero4 = vec_int4_z; + vec_short8 ad = vec_abss(vec_subs(a.val, b.val)); + vec_int4 sum4 = vec_sum4s(ad, zero4); + return (unsigned)vec_extract(vec_sums(sum4, zero4), 3); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + const vec_uint4 ad = vec_absd(a.val, b.val); + const vec_uint4 rd = vec_add(ad, vec_sld(ad, ad, 8)); + return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + vec_int4 ad = vec_abss(vec_sub(a.val, b.val)); + return (unsigned)vec_extract(vec_sums(ad, vec_int4_z), 3); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + const vec_float4 ad = vec_abs(vec_sub(a.val, b.val)); + const vec_float4 rd = vec_add(ad, vec_sld(ad, ad, 8)); + return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0); +} + +/** Popcount **/ +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } + +/** Mask **/ +inline int v_signmask(const v_uint8x16& a) +{ + static const vec_uchar16 qperm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_int16x8& a) +{ + static const vec_uchar16 qperm = {112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_uint16x8& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } + +inline int v_signmask(const v_int32x4& a) +{ + static const vec_uchar16 qperm = {96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_uint32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } + +inline int v_signmask(const v_int64x2& a) +{ + VSX_UNUSED(const vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63)); + return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1; +} +inline int v_signmask(const v_uint64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } + +template +inline bool v_check_all(const _Tpvec& a) +{ return vec_all_lt(a.val, _Tpvec::zero().val); } +inline bool v_check_all(const v_uint8x16& a) +{ return v_check_all(v_reinterpret_as_s8(a)); } +inline bool v_check_all(const v_uint16x8& a) +{ return v_check_all(v_reinterpret_as_s16(a)); } +inline bool v_check_all(const v_uint32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_uint64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } + +template +inline bool v_check_any(const _Tpvec& a) +{ return vec_any_lt(a.val, _Tpvec::zero().val); } +inline bool v_check_any(const v_uint8x16& a) +{ return v_check_any(v_reinterpret_as_s8(a)); } +inline bool v_check_any(const v_uint16x8& a) +{ return v_check_any(v_reinterpret_as_s16(a)); } +inline bool v_check_any(const v_uint32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_uint64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } + +////////// Other math ///////// + +/** Some frequent operations **/ +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ return v_float32x4(vec_sqrt(x.val)); } +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ return v_float64x2(vec_sqrt(x.val)); } + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ return v_float32x4(vec_rsqrt(x.val)); } +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ return v_float64x2(vec_rsqrt(x.val)); } + +#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec) \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); } \ +inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } + +OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4) +OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2) + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ return a * b + c; } + +// TODO: exp, log, sin, cos + +/** Absolute values **/ +inline v_uint8x16 v_abs(const v_int8x16& x) +{ return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); } + +inline v_uint16x8 v_abs(const v_int16x8& x) +{ return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); } + +inline v_uint32x4 v_abs(const v_int32x4& x) +{ return v_uint32x4(vec_uint4_c(vec_abs(x.val))); } + +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(vec_abs(x.val)); } + +inline v_float64x2 v_abs(const v_float64x2& x) +{ return v_float64x2(vec_abs(x.val)); } + +/** Absolute difference **/ +// unsigned +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd) + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ return v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); } +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ return v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); } + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ return v_abs(a - b); } +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ return v_abs(a - b); } + +/** Absolute difference for signed integers **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ return v_int8x16(vec_abss(vec_subs(a.val, b.val))); } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_int16x8(vec_abss(vec_subs(a.val, b.val))); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x4 v_round(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_rint(a.val))); } + +inline v_int32x4 v_round(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); } + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); } + +inline v_int32x4 v_floor(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_floor(a.val))); } + +inline v_int32x4 v_floor(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); } + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_ceil(a.val))); } + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); } + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(vec_cts(a.val)); } + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); } + +/** To float **/ +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ return v_float32x4(vec_ctf(a.val)); } + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); } + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_cvfo(b.val))); } + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ return v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ return v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ return v_float64x2(vec_ctd(a.val)); } + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + return v_reinterpret_as_s8(v_int16x8(*(const short*)(tab+idx[0]), *(const short*)(tab+idx[1]), *(const short*)(tab+idx[2]), *(const short*)(tab+idx[3]), + *(const short*)(tab+idx[4]), *(const short*)(tab+idx[5]), *(const short*)(tab+idx[6]), *(const short*)(tab+idx[7]))); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + return v_reinterpret_as_s8(v_int32x4(*(const int*)(tab+idx[0]), *(const int*)(tab+idx[1]), *(const int*)(tab+idx[2]), *(const int*)(tab+idx[3]))); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + return v_reinterpret_as_s16(v_int32x4(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_reinterpret_as_s16(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_reinterpret_as_s32(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vsx_ld(0, tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(vsx_ld2(0, tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int*)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_load(tab + *idx); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_load(tab + *idx); } + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_uint32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + const int idx[2] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1) + }; + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + vec_float4 xy0 = vec_ld_l8(tab + vec_extract(idxvec.val, 0)); + vec_float4 xy1 = vec_ld_l8(tab + vec_extract(idxvec.val, 1)); + vec_float4 xy2 = vec_ld_l8(tab + vec_extract(idxvec.val, 2)); + vec_float4 xy3 = vec_ld_l8(tab + vec_extract(idxvec.val, 3)); + vec_float4 xy02 = vec_mergeh(xy0, xy2); // x0, x2, y0, y2 + vec_float4 xy13 = vec_mergeh(xy1, xy3); // x1, x3, y1, y3 + x.val = vec_mergeh(xy02, xy13); + y.val = vec_mergel(xy02, xy13); +} +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + vec_double2 xy0 = vsx_ld(vec_extract(idxvec.val, 0), tab); + vec_double2 xy1 = vsx_ld(vec_extract(idxvec.val, 1), tab); + x.val = vec_mergeh(xy0, xy1); + y.val = vec_mergel(xy0, xy1); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } + +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 4,5, 2,3, 6,7, 8,9, 12,13, 10,11, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } + +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + static const vec_uchar16 perm = {0,1,2,3, 8,9,10,11, 4,5,6,7, 12,13,14,15}; + return v_int32x4(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) +{ return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) +{ return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 15, 15, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 2,3, 4,5, 8,9, 10,11, 12,13, 14,15, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) +{ return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) +{ return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) +{ return vec; } + +/////// FP16 support //////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr); +#if CV_VSX3 && defined(vec_extract_fp_from_shorth) + return v_float32x4(vec_extract_fp_from_shorth(vf16)); +#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_float4 vf32; + __asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wa" (vf32) : "wa" (vec_mergeh(vf16, vf16))); + return v_float32x4(vf32); +#else + const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000); + const vec_int4 signmask = vec_int4_sp(0x80000000); + const vec_int4 maxexp = vec_int4_sp(0x7c000000); + const vec_float4 deltaf = vec_float4_c(vec_int4_sp(0x38800000)); + + vec_int4 bits = vec_int4_c(vec_mergeh(vec_short8_c(z), vec_short8_c(vf16))); + vec_int4 e = vec_and(bits, maxexp), sign = vec_and(bits, signmask); + vec_int4 t = vec_add(vec_sr(vec_xor(bits, sign), vec_uint4_sp(3)), delta); // ((h & 0x7fff) << 13) + delta + vec_int4 zt = vec_int4_c(vec_sub(vec_float4_c(vec_add(t, vec_int4_sp(1 << 23))), deltaf)); + + t = vec_add(t, vec_and(delta, vec_cmpeq(maxexp, e))); + vec_bint4 zmask = vec_cmpeq(e, z); + vec_int4 ft = vec_sel(t, zt, zmask); + return v_float32x4(vec_float4_c(vec_or(ft, sign))); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ +// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"? +#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_ushort8 vf16; + __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (v.val)); + vec_st_l8(vec_mergesqe(vf16, vf16), ptr); +#else + const vec_int4 signmask = vec_int4_sp(0x80000000); + const vec_int4 rval = vec_int4_sp(0x3f000000); + + vec_int4 t = vec_int4_c(v.val); + vec_int4 sign = vec_sra(vec_and(t, signmask), vec_uint4_sp(16)); + t = vec_and(vec_nor(signmask, signmask), t); + + vec_bint4 finitemask = vec_cmpgt(vec_int4_sp(0x47800000), t); + vec_bint4 isnan = vec_cmpgt(t, vec_int4_sp(0x7f800000)); + vec_int4 naninf = vec_sel(vec_int4_sp(0x7c00), vec_int4_sp(0x7e00), isnan); + vec_bint4 tinymask = vec_cmpgt(vec_int4_sp(0x38800000), t); + vec_int4 tt = vec_int4_c(vec_add(vec_float4_c(t), vec_float4_c(rval))); + tt = vec_sub(tt, rval); + vec_int4 odd = vec_and(vec_sr(t, vec_uint4_sp(13)), vec_int4_sp(1)); + vec_int4 nt = vec_add(t, vec_int4_sp(0xc8000fff)); + nt = vec_sr(vec_add(nt, odd), vec_uint4_sp(13)); + t = vec_sel(nt, tt, tinymask); + t = vec_sel(naninf, t, finitemask); + t = vec_or(t, sign); + vec_st_l8(vec_packs(t, t), ptr); +#endif +} + +inline void v_cleanup() {} + + +/** Reinterpret **/ +/** its up there with load and store operations **/ + +////////// Matrix operations ///////// + +//////// Dot Product //////// +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(vec_msum(a.val, b.val, c.val)); } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + vec_dword2 even = vec_mule(a.val, b.val); + vec_dword2 odd = vec_mulo(a.val, b.val); + return v_int64x2(vec_add(even, odd)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_uint32x4(vec_msum(a.val, b.val, c.val)); } +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)); } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + const vec_ushort8 eight = vec_ushort8_sp(8); + vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even + vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd + vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight); + vec_short8 b1 = vec_sra((vec_short8)b.val, eight); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z))); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + const vec_ushort8 eight = vec_ushort8_sp(8); + vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even + vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd + vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight); + vec_short8 b1 = vec_sra((vec_short8)b.val, eight); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, c.val))); +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + const vec_uint4 zero = vec_uint4_z; + vec_uint4 even = vec_mule(a.val, b.val); + vec_uint4 odd = vec_mulo(a.val, b.val); + vec_udword2 e0 = (vec_udword2)vec_mergee(even, zero); + vec_udword2 e1 = (vec_udword2)vec_mergeo(even, zero); + vec_udword2 o0 = (vec_udword2)vec_mergee(odd, zero); + vec_udword2 o1 = (vec_udword2)vec_mergeo(odd, zero); + vec_udword2 s0 = vec_add(e0, o0); + vec_udword2 s1 = vec_add(e1, o1); + return v_uint64x2(vec_add(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return v_int64x2(vec_add(vec_mergeh(c.val, d.val), vec_mergel(c.val, d.val))); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)) + c; } +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)) + c; } + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + vec_short8 a0 = vec_unpackh(a.val); + vec_short8 a1 = vec_unpackl(a.val); + vec_short8 b0 = vec_unpackh(b.val); + vec_short8 b1 = vec_unpackl(b.val); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z))); +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return c + d; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + const vec_float4 v0 = vec_splat(v.val, 0); + const vec_float4 v1 = vec_splat(v.val, 1); + const vec_float4 v2 = vec_splat(v.val, 2); + VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3); + return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val))))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + const vec_float4 v0 = vec_splat(v.val, 0); + const vec_float4 v1 = vec_splat(v.val, 1); + const vec_float4 v2 = vec_splat(v.val, 2); + return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val)))); +} + +#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ +{ \ + _Tpvec2 a02 = vec_mergeh(a0.val, a2.val); \ + _Tpvec2 a13 = vec_mergeh(a1.val, a3.val); \ + b0.val = vec_mergeh(a02, a13); \ + b1.val = vec_mergel(a02, a13); \ + a02 = vec_mergel(a0.val, a2.val); \ + a13 = vec_mergel(a1.val, a3.val); \ + b2.val = vec_mergeh(a02, a13); \ + b3.val = vec_mergel(a02, a13); \ +} +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4) +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4) +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4) + +template +inline Tvec v_broadcast_element(const Tvec& v) +{ return Tvec(vec_splat(v.val, i)); } + + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif // OPENCV_HAL_VSX_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_wasm.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_wasm.hpp new file mode 100644 index 0000000..22c4a34 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/intrin_wasm.hpp @@ -0,0 +1,4083 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_HAL_INTRIN_WASM_HPP +#define OPENCV_HAL_INTRIN_WASM_HPP + +#include +#include +#include +#include "opencv2/core/saturate.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 0 // Now all implementation of f64 use fallback, so disable it. +#define CV_SIMD128_FP16 0 + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) < (1038046) +// handle renames: https://github.com/emscripten-core/emscripten/pull/9440 (https://github.com/emscripten-core/emscripten/commit/755d5b46cb84d0aa120c10981b11d05646c29673) +#define wasm_i32x4_trunc_saturate_f32x4 wasm_trunc_saturate_i32x4_f32x4 +#define wasm_u32x4_trunc_saturate_f32x4 wasm_trunc_saturate_u32x4_f32x4 +#define wasm_i64x2_trunc_saturate_f64x2 wasm_trunc_saturate_i64x2_f64x2 +#define wasm_u64x2_trunc_saturate_f64x2 wasm_trunc_saturate_u64x2_f64x2 +#define wasm_f32x4_convert_i32x4 wasm_convert_f32x4_i32x4 +#define wasm_f32x4_convert_u32x4 wasm_convert_f32x4_u32x4 +#define wasm_f64x2_convert_i64x2 wasm_convert_f64x2_i64x2 +#define wasm_f64x2_convert_u64x2 wasm_convert_f64x2_u64x2 +#endif // COMPATIBILITY: <1.38.46 + +///////// Types /////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + typedef v128_t vector_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(v128_t v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = wasm_v128_load(v); + } + + uchar get0() const + { + return (uchar)wasm_i8x16_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + typedef v128_t vector_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(v128_t v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = wasm_v128_load(v); + } + + schar get0() const + { + return wasm_i8x16_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + typedef v128_t vector_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(v128_t v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = wasm_v128_load(v); + } + + ushort get0() const + { + return (ushort)wasm_i16x8_extract_lane(val, 0); // wasm_u16x8_extract_lane() unimplemented yet + } + + v128_t val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + typedef v128_t vector_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(v128_t v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = wasm_v128_load(v); + } + + short get0() const + { + return wasm_i16x8_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(v128_t v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + + unsigned get0() const + { + return (unsigned)wasm_i32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(v128_t v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + + int get0() const + { + return wasm_i32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(v128_t v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + + float get0() const + { + return wasm_f32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(v128_t v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = wasm_v128_load(v); + } + + uint64 get0() const + { + return (uint64)wasm_i64x2_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(v128_t v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = wasm_v128_load(v); + } + + int64 get0() const + { + return wasm_i64x2_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(v128_t v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = wasm_v128_load(v); + } + + double get0() const + { + return wasm_f64x2_extract_lane(val, 0); + } + + v128_t val; +}; + +namespace fallback +{ + +template struct v_reg +{ + typedef _Tp lane_type; + enum { nlanes = n }; + + explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } + + v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } + + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } + + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + } + + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7, + _Tp s8, _Tp s9, _Tp s10, _Tp s11, + _Tp s12, _Tp s13, _Tp s14, _Tp s15) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; + s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; + } + + v_reg() {} + + v_reg(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + } + + _Tp get0() const { return s[0]; } + + _Tp get(const int i) const { return s[i]; } + v_reg<_Tp, n> high() const + { + v_reg<_Tp, n> c; + int i; + for( i = 0; i < n/2; i++ ) + { + c.s[i] = s[i+(n/2)]; + c.s[i+(n/2)] = 0; + } + return c; + } + + static v_reg<_Tp, n> zero() + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = (_Tp)0; + return c; + } + + static v_reg<_Tp, n> all(_Tp s) + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = s; + return c; + } + + template v_reg<_Tp2, n2> reinterpret_as() const + { + size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); + v_reg<_Tp2, n2> c; + std::memcpy(&c.s[0], &s[0], bytes); + return c; + } + + v_reg(const cv::v_uint8x16& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int8x16& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_uint16x8& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int16x8& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_uint32x4& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int32x4& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_float32x4& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_float64x2& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_uint64x2& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int64x2& v) { wasm_v128_store(&s, v.val); } + + operator cv::v_uint8x16() const { return cv::v_uint8x16(wasm_v128_load(&s)); } + operator cv::v_int8x16() const { return cv::v_int8x16(wasm_v128_load(&s)); } + operator cv::v_uint16x8() const { return cv::v_uint16x8(wasm_v128_load(&s)); } + operator cv::v_int16x8() const { return cv::v_int16x8(wasm_v128_load(&s)); } + operator cv::v_uint32x4() const { return cv::v_uint32x4(wasm_v128_load(&s)); } + operator cv::v_int32x4() const { return cv::v_int32x4(wasm_v128_load(&s)); } + operator cv::v_float32x4() const { return cv::v_float32x4(wasm_v128_load(&s)); } + operator cv::v_float64x2() const { return cv::v_float64x2(wasm_v128_load(&s)); } + operator cv::v_uint64x2() const { return cv::v_uint64x2(wasm_v128_load(&s)); } + operator cv::v_int64x2() const { return cv::v_int64x2(wasm_v128_load(&s)); } + + _Tp s[n]; +}; + +typedef v_reg v_uint8x16; +typedef v_reg v_int8x16; +typedef v_reg v_uint16x8; +typedef v_reg v_int16x8; +typedef v_reg v_uint32x4; +typedef v_reg v_int32x4; +typedef v_reg v_float32x4; +typedef v_reg v_float64x2; +typedef v_reg v_uint64x2; +typedef v_reg v_int64x2; + +#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \ +template inline v_reg<_Tp, n> \ + operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return c; \ +} \ +template inline v_reg<_Tp, n>& \ + operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return a; \ +} + +OPENCV_HAL_IMPL_BIN_OP(+) +OPENCV_HAL_IMPL_BIN_OP(-) +OPENCV_HAL_IMPL_BIN_OP(*) +OPENCV_HAL_IMPL_BIN_OP(/) + +#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \ +template inline v_reg<_Tp, n> operator bit_op \ + (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return c; \ +} \ +template inline v_reg<_Tp, n>& operator \ + bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return a; \ +} + +OPENCV_HAL_IMPL_BIT_OP(&) +OPENCV_HAL_IMPL_BIT_OP(|) +OPENCV_HAL_IMPL_BIT_OP(^) + +template inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); + } + return c; +} + +#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ +template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp2, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i]); \ + return c; \ +} + +OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, + typename V_TypeTraits<_Tp>::abs_type) +OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int) +OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int) +OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int) +OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int) + +#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ +template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i], b.s[i]); \ + return c; \ +} + +#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ +template inline _Tp func(const v_reg<_Tp, n>& a) \ +{ \ + _Tp c = a.s[0]; \ + for( int i = 1; i < n; i++ ) \ + c = cfunc(c, a.s[i]); \ + return c; \ +} + +OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) +OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) + +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +template +inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) +{ + v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); + for (int i = 0; i < (int)(n*sizeof(_Tp)); i++) + b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; + return b; +} + +template +inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) +{ + for( int i = 0; i < n; i++ ) + { + minval.s[i] = std::min(a.s[i], b.s[i]); + maxval.s[i] = std::max(a.s[i], b.s[i]); + } +} + +#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ +template \ +inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ + return c; \ +} + +OPENCV_HAL_IMPL_CMP_OP(<) +OPENCV_HAL_IMPL_CMP_OP(>) +OPENCV_HAL_IMPL_CMP_OP(<=) +OPENCV_HAL_IMPL_CMP_OP(>=) +OPENCV_HAL_IMPL_CMP_OP(==) +OPENCV_HAL_IMPL_CMP_OP(!=) + +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} + +#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ +template \ +inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef _Tp2 rtype; \ + v_reg c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ + return c; \ +} + +OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) +OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) +OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) + +template inline T _absdiff(T a, T b) +{ + return a > b ? a - b : b - a; +} + +template +inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) +{ + typedef typename V_TypeTraits<_Tp>::abs_type rtype; + v_reg c; + const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); + for( int i = 0; i < n; i++ ) + { + rtype ua = a.s[i] ^ mask; + rtype ub = b.s[i] ^ mask; + c.s[i] = _absdiff(ua, ub); + } + return c; +} + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +template +inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++) + c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); + return c; +} + +template +inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = 1.f/std::sqrt(a.s[i]); + return c; +} + +template +inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); + return c; +} + +template +inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; + return c; +} + +template +inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + v_reg<_Tp, n> d; + for( int i = 0; i < n; i++ ) + d.s[i] = a.s[i]*b.s[i] + c.s[i]; + return d; +} + +template +inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + return v_fma(a, b, c); +} + +template inline v_reg::w_type, n/2> + v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; + return c; +} + +template inline v_reg::w_type, n/2> + v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg::w_type, n / 2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg s; + for( int i = 0; i < (n/2); i++ ) + s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; + return s; +} + +template inline v_reg::q_type, n/4> + v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; + return s; +} + +template inline v_reg::q_type, n/4> + v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; + return s; +} + +template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg::w_type, n/2>& c, + v_reg::w_type, n/2>& d) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i]*b.s[i]; + d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; + } +} + +template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg<_Tp, n> c; + for (int i = 0; i < n; i++) + c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); + return c; +} + +template inline void v_hsum(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; + } +} + +#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ +template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = (_Tp)(a.s[i] shift_op imm); \ + return c; \ +} + +OPENCV_HAL_IMPL_SHIFT_OP(<< ) +OPENCV_HAL_IMPL_SHIFT_OP(>> ) + +#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> b; \ + for (int i = 0; i < n; i++) \ + { \ + int sIndex = i opA imm; \ + if (0 <= sIndex && sIndex < n) \ + { \ + b.s[i] = a.s[sIndex]; \ + } \ + else \ + { \ + b.s[i] = 0; \ + } \ + } \ + return b; \ +} \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for (int i = 0; i < n; i++) \ + { \ + int aIndex = i opA imm; \ + int bIndex = i opA imm opB n; \ + if (0 <= bIndex && bIndex < n) \ + { \ + c.s[i] = b.s[bIndex]; \ + } \ + else if (0 <= aIndex && aIndex < n) \ + { \ + c.s[i] = a.s[aIndex]; \ + } \ + else \ + { \ + c.s[i] = 0; \ + } \ + } \ + return c; \ +} + +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) + +template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) +{ + typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; + for( int i = 1; i < n; i++ ) + c += a.s[i]; + return c; +} + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v_float32x4 r; + r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; + r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; + r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; + r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; + return r; +} + +template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); + for (int i = 1; i < n; i++) + c += _absdiff(a.s[i], b.s[i]); + return c; +} + +template inline int v_signmask(const v_reg<_Tp, n>& a) +{ + int mask = 0; + for( int i = 0; i < n; i++ ) + mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; + return mask; +} + +template inline bool v_check_all(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) + return false; + return true; +} + +template inline bool v_check_any(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) + return true; + return false; +} + +template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, + const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef V_TypeTraits<_Tp> Traits; + typedef typename Traits::int_type int_type; + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + { + int_type m = Traits::reinterpret_int(mask.s[i]); + CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc + c.s[i] = m ? a.s[i] : b.s[i]; + } + return c; +} + +template inline void v_expand(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& b0, + v_reg::w_type, n/2>& b1) +{ + for( int i = 0; i < (n/2); i++ ) + { + b0.s[i] = a.s[i]; + b1.s[i] = a.s[i+(n/2)]; + } +} + +template +inline v_reg::w_type, n/2> +v_expand_low(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i]; + return b; +} + +template +inline v_reg::w_type, n/2> +v_expand_high(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i+(n/2)]; + return b; +} + +template inline v_reg::int_type, n> + v_reinterpret_as_int(const v_reg<_Tp, n>& a) +{ + v_reg::int_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); + return c; +} + +template inline v_reg::uint_type, n> + v_reinterpret_as_uint(const v_reg<_Tp, n>& a) +{ + v_reg::uint_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); + return c; +} + +template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, + v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) +{ + int i; + for( i = 0; i < n/2; i++ ) + { + b0.s[i*2] = a0.s[i]; + b0.s[i*2+1] = a1.s[i]; + } + for( ; i < n; i++ ) + { + b1.s[i*2-n] = a0.s[i]; + b1.s[i*2-n+1] = a1.s[i]; + } +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) +{ + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) +{ + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = loptr[i]; + c.s[i+c.nlanes/2] = hiptr[i]; + } + return c; +} + +template +inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> +v_load_expand(const _Tp* ptr) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg::nlanes128> c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +template +inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> +v_load_expand_q(const _Tp* ptr) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg::nlanes128> c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b) +{ + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + a.s[i] = ptr[i2]; + b.s[i] = ptr[i2+1]; + } +} + +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) +{ + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + a.s[i] = ptr[i3]; + b.s[i] = ptr[i3+1]; + c.s[i] = ptr[i3+2]; + } +} + +template +inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, + v_reg<_Tp, n>& d) +{ + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + a.s[i] = ptr[i4]; + b.s[i] = ptr[i4+1]; + c.s[i] = ptr[i4+2]; + d.s[i] = ptr[i4+3]; + } +} + +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + ptr[i2] = a.s[i]; + ptr[i2+1] = b.s[i]; + } +} + +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + ptr[i3] = a.s[i]; + ptr[i3+1] = b.s[i]; + ptr[i3+2] = c.s[i]; + } +} + +template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + const v_reg<_Tp, n>& d, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + ptr[i4] = a.s[i]; + ptr[i4+1] = b.s[i]; + ptr[i4+2] = c.s[i]; + ptr[i4+3] = d.s[i]; + } +} + +template +inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i+(n/2)]; +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i]; + c.s[i+(n/2)] = b.s[i]; + } + return c; +} + +template +inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i+(n/2)]; + c.s[i+(n/2)] = b.s[i+(n/2)]; + } + return c; +} + +template +inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) +{ + for( int i = 0; i < (n/2); i++ ) + { + low.s[i] = a.s[i]; + low.s[i+(n/2)] = b.s[i]; + high.s[i] = a.s[i+(n/2)]; + high.s[i+(n/2)] = b.s[i+(n/2)]; + } +} + +template +inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> r; + const int shift = n - s; + int i = 0; + for (; i < shift; ++i) + r.s[i] = a.s[i+s]; + for (; i < n; ++i) + r.s[i] = b.s[i-shift]; + return r; +} + +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvRound(a.s[i]); + return c; +} + +template inline v_reg v_round(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = cvRound(b.s[i]); + } + return c; +} + +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvFloor(a.s[i]); + return c; +} + +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvCeil(a.s[i]); + return c; +} + +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (int)(a.s[i]); + return c; +} + +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvFloor(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvCeil(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (int)(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (float)a.s[i]; + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = (float)b.s[i]; + } + return c; +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i+2]; + return c; +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i+2]; + return c; +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i]]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 2] + i % 2]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 4] + i % 4]; + return c; +} + +template inline v_reg v_lut(const int* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) +{ + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const float* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const double* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[4*i ] = vec.s[4*i ]; + c.s[4*i+1] = vec.s[4*i+2]; + c.s[4*i+2] = vec.s[4*i+1]; + c.s[4*i+3] = vec.s[4*i+3]; + } + return c; +} + +template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/8; i++) + { + c.s[8*i ] = vec.s[8*i ]; + c.s[8*i+1] = vec.s[8*i+4]; + c.s[8*i+2] = vec.s[8*i+1]; + c.s[8*i+3] = vec.s[8*i+5]; + c.s[8*i+4] = vec.s[8*i+2]; + c.s[8*i+5] = vec.s[8*i+6]; + c.s[8*i+6] = vec.s[8*i+3]; + c.s[8*i+7] = vec.s[8*i+7]; + } + return c; +} + +template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[3*i ] = vec.s[4*i ]; + c.s[3*i+1] = vec.s[4*i+1]; + c.s[3*i+2] = vec.s[4*i+2]; + } + return c; +} + +template +inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, + const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, + v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, + v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) +{ + b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); + b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); + b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); + b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); +} + +#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } + +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) + +#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } + +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) + +#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ +template inline _Tpvec \ + v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ +{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } + +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) + +#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return a << n; } + +OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) +OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) +OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return a >> n; } + +OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) +OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) +OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ \ + _Tpvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + return c; \ +} + +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ +inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpnvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + { \ + c.s[i] = cast<_Tpn>(a.s[i]); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ + } \ + return c; \ +} + +OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) + +#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpnvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + { \ + c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + } \ + return c; \ +} + +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) + +#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +{ \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + ptr[i] = cast<_Tpn>(a.s[i]); \ +} + +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) + +#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +{ \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ +} + +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) + +template +inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + for (int i = 0; i < n; ++i) + { + mptr[i] = (_Tpm)a.s[i]; + mptr[i + n] = (_Tpm)b.s[i]; + } +} + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + return mask; +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 8, c, d); + return mask; +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 4, c, d); + _pack_b(mask.s + 8, e, f); + _pack_b(mask.s + 12, g, h); + return mask; +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], + v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], + v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], + v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], + v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], + v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], + v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); +} + +inline v_reg::nlanes128> +v_load_expand(const float16_t* ptr) +{ + v_reg::nlanes128> v; + for( int i = 0; i < v.nlanes; i++ ) + { + v.s[i] = ptr[i]; + } + return v; +} + +inline void +v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) +{ + for( int i = 0; i < v.nlanes; i++ ) + { + ptr[i] = float16_t(v.s[i]); + } +} + +inline void v_cleanup() {} +} // namespace fallback + +static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); +} + +static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23); +} + +static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23); +} + +static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); +} + +static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31); +} + +static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31); +} + +static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31); +} + +static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +/** Convert **/ +// 8 >> 16 +inline v128_t v128_cvtu8x16_i16x8(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i8x16(a, z); +} +inline v128_t v128_cvti8x16_i16x8(const v128_t& a) +{ return wasm_i16x8_shr(wasm_unpacklo_i8x16(a, a), 8); } +// 8 >> 32 +inline v128_t v128_cvtu8x16_i32x4(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i16x8(wasm_unpacklo_i8x16(a, z), z); +} +inline v128_t v128_cvti8x16_i32x4(const v128_t& a) +{ + v128_t r = wasm_unpacklo_i8x16(a, a); + r = wasm_unpacklo_i8x16(r, r); + return wasm_i32x4_shr(r, 24); +} +// 16 >> 32 +inline v128_t v128_cvtu16x8_i32x4(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i16x8(a, z); +} +inline v128_t v128_cvti16x8_i32x4(const v128_t& a) +{ return wasm_i32x4_shr(wasm_unpacklo_i16x8(a, a), 16); } +// 32 >> 64 +inline v128_t v128_cvtu32x4_i64x2(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i32x4(a, z); +} +inline v128_t v128_cvti32x4_i64x2(const v128_t& a) +{ return wasm_unpacklo_i32x4(a, wasm_i32x4_shr(a, 31)); } + +// 16 << 8 +inline v128_t v128_cvtu8x16_i16x8_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i8x16(a, z); +} +inline v128_t v128_cvti8x16_i16x8_high(const v128_t& a) +{ return wasm_i16x8_shr(wasm_unpackhi_i8x16(a, a), 8); } +// 32 << 16 +inline v128_t v128_cvtu16x8_i32x4_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i16x8(a, z); +} +inline v128_t v128_cvti16x8_i32x4_high(const v128_t& a) +{ return wasm_i32x4_shr(wasm_unpackhi_i16x8(a, a), 16); } +// 64 << 32 +inline v128_t v128_cvtu32x4_i64x2_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i32x4(a, z); +} +inline v128_t v128_cvti32x4_i64x2_high(const v128_t& a) +{ return wasm_unpackhi_i32x4(a, wasm_i32x4_shr(a, 31)); } + +#define OPENCV_HAL_IMPL_WASM_INITVEC(_Tpvec, _Tp, suffix, zsuffix, _Tps) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(wasm_##zsuffix##_splat((_Tps)0)); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(wasm_##zsuffix##_splat((_Tps)v)); } \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ +{ return _Tpvec(a.val); } + +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint8x16, uchar, u8, i8x16, schar) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int8x16, schar, s8, i8x16, schar) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint16x8, ushort, u16, i16x8, short) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int16x8, short, s16, i16x8, short) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint32x4, unsigned, u32, i32x4, int) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int32x4, int, s32, i32x4, int) +OPENCV_HAL_IMPL_WASM_INITVEC(v_float32x4, float, f32, f32x4, float) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint64x2, uint64, u64, i64x2, int64) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int64x2, int64, s64, i64x2, int64) +OPENCV_HAL_IMPL_WASM_INITVEC(v_float64x2, double, f64, f64x2, double) + +//////////////// PACK /////////////// +inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); + return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); + return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); + return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b) +{ + return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); + return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); + return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} + +template +inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_u16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); + return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_u32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval)); + return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +template +inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i16x8_splat(-32768); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); + return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +template +inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n); + return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +template +inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n); + return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +template +inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); + return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); + return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} + +inline void v_pack_store(uchar* ptr, const v_uint16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(schar* ptr, const v_int16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + schar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(ushort* ptr, const v_uint32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(short* ptr, const v_int32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + short t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + unsigned t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(int* ptr, const v_int64x2& a) +{ + v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + int t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} + +template +inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a) +{ + v128_t delta = wasm_i16x8_splat((short)(1 << (n-1))); + v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + schar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(short* ptr, const v_int32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + short t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + unsigned t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(int* ptr, const v_int64x2& a) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + int t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + v128_t maxval = wasm_i32x4_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); + v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval)); + v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval)); + v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v128_t maxval = wasm_i32x4_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, ((__u64x2)(a.val) > (__u64x2)maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, ((__u64x2)(b.val) > (__u64x2)maxval)); + v128_t c1 = wasm_v128_bitselect(maxval, c.val, ((__u64x2)(c.val) > (__u64x2)maxval)); + v128_t d1 = wasm_v128_bitselect(maxval, d.val, ((__u64x2)(d.val) > (__u64x2)maxval)); + v128_t e1 = wasm_v128_bitselect(maxval, e.val, ((__u64x2)(e.val) > (__u64x2)maxval)); + v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval)); + v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval)); + v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval)); + v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0)); + v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1)); + v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2)); + v128_t v3 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 3)); + v0 = wasm_f32x4_mul(v0, m0.val); + v1 = wasm_f32x4_mul(v1, m1.val); + v2 = wasm_f32x4_mul(v2, m2.val); + v3 = wasm_f32x4_mul(v3, m3.val); + + return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, v3))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0)); + v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1)); + v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2)); + v0 = wasm_f32x4_mul(v0, m0.val); + v1 = wasm_f32x4_mul(v1, m1.val); + v2 = wasm_f32x4_mul(v2, m2.val); + + return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, a.val))); +} + +#define OPENCV_HAL_IMPL_WASM_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint8x16, wasm_u8x16_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint8x16, wasm_u8x16_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int8x16, wasm_i8x16_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int8x16, wasm_i8x16_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint16x8, wasm_u16x8_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint16x8, wasm_u16x8_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int16x8, wasm_i16x8_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int16x8, wasm_i16x8_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint32x4, wasm_i32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint32x4, wasm_i32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_uint32x4, wasm_i32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int32x4, wasm_i32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int32x4, wasm_i32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_int32x4, wasm_i32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float32x4, wasm_f32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float32x4, wasm_f32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float32x4, wasm_f32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float32x4, wasm_f32x4_div) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint64x2, wasm_i64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint64x2, wasm_i64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int64x2, wasm_i64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int64x2, wasm_i64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float64x2, wasm_f64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float64x2, wasm_f64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float64x2, wasm_f64x2_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float64x2, wasm_f64x2_div) + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_WASM_MUL_SAT(_Tpvec, _Tpwvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{ a = a * b; return a; } + +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int16x8, v_int32x4) + +// Multiply and expand +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v_uint16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v_int16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + v_int32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = wasm_i32x4_mul(a0.val, b0.val); + d.val = wasm_i32x4_mul(a1.val, b1.val); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = wasm_i32x4_mul(a0.val, b0.val); + d.val = wasm_i32x4_mul(a1.val, b1.val); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + v_uint64x2 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = ((__u64x2)(a0.val) * (__u64x2)(b0.val)); + d.val = ((__u64x2)(a1.val) * (__u64x2)(b1.val)); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + v128_t c = wasm_i32x4_mul(a0.val, b0.val); + v128_t d = wasm_i32x4_mul(a1.val, b1.val); + return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + v128_t c = wasm_i32x4_mul(a0.val, b0.val); + v128_t d = wasm_i32x4_mul(a1.val, b1.val); + return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); +} + +//////// Dot Product //////// + +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16); + v128_t a1 = wasm_i32x4_shr(a.val, 16); + v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16); + v128_t b1 = wasm_i32x4_shr(b.val, 16); + v128_t c = wasm_i32x4_mul(a0, b0); + v128_t d = wasm_i32x4_mul(a1, b1); + return v_int32x4(wasm_i32x4_add(c, d)); +} + +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + v128_t a0 = wasm_i64x2_shr(wasm_i64x2_shl(a.val, 32), 32); + v128_t a1 = wasm_i64x2_shr(a.val, 32); + v128_t b0 = wasm_i64x2_shr(wasm_i64x2_shl(b.val, 32), 32); + v128_t b1 = wasm_i64x2_shr(b.val, 32); + v128_t c = (v128_t)((__i64x2)a0 * (__i64x2)b0); + v128_t d = (v128_t)((__i64x2)a1 * (__i64x2)b1); + return v_int64x2(wasm_i64x2_add(c, d)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + return v_dotprod(a, b) + c; +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + v128_t a0 = wasm_u16x8_shr(wasm_i16x8_shl(a.val, 8), 8); + v128_t a1 = wasm_u16x8_shr(a.val, 8); + v128_t b0 = wasm_u16x8_shr(wasm_i16x8_shl(b.val, 8), 8); + v128_t b1 = wasm_u16x8_shr(b.val, 8); + return v_uint32x4(( + v_dotprod(v_int16x8(a0), v_int16x8(b0)) + + v_dotprod(v_int16x8(a1), v_int16x8(b1))).val + ); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + v128_t a0 = wasm_i16x8_shr(wasm_i16x8_shl(a.val, 8), 8); + v128_t a1 = wasm_i16x8_shr(a.val, 8); + v128_t b0 = wasm_i16x8_shr(wasm_i16x8_shl(b.val, 8), 8); + v128_t b1 = wasm_i16x8_shr(b.val, 8); + return v_int32x4( + v_dotprod(v_int16x8(a0), v_int16x8(b0)) + + v_dotprod(v_int16x8(a1), v_int16x8(b1)) + ); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t a0 = wasm_u32x4_shr(wasm_i32x4_shl(a.val, 16), 16); + v128_t a1 = wasm_u32x4_shr(a.val, 16); + v128_t b0 = wasm_u32x4_shr(wasm_i32x4_shl(b.val, 16), 16); + v128_t b1 = wasm_u32x4_shr(b.val, 16); + return v_uint64x2(( + v_dotprod(v_int32x4(a0), v_int32x4(b0)) + + v_dotprod(v_int32x4(a1), v_int32x4(b1))).val + ); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16); + v128_t a1 = wasm_i32x4_shr(a.val, 16); + v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16); + v128_t b1 = wasm_i32x4_shr(b.val, 16); + return v_int64x2(( + v_dotprod(v_int32x4(a0), v_int32x4(b0)) + + v_dotprod(v_int32x4(a1), v_int32x4(b1))) + ); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_IMPL_WASM_LOGIC_OP(_Tpvec) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(&, _Tpvec, wasm_v128_and) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(|, _Tpvec, wasm_v128_or) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(^, _Tpvec, wasm_v128_xor) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + return _Tpvec(wasm_v128_not(a.val)); \ +} + +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint8x16) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int8x16) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint16x8) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int16x8) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint64x2) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int64x2) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float64x2) + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + return v_float32x4(wasm_f32x4_sqrt(x.val)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + const v128_t _1_0 = wasm_f32x4_splat(1.0); + return v_float32x4(wasm_f32x4_div(_1_0, wasm_f32x4_sqrt(x.val))); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + return v_float64x2(wasm_f64x2_sqrt(x.val)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + const v128_t _1_0 = wasm_f64x2_splat(1.0); + return v_float64x2(wasm_f64x2_div(_1_0, wasm_f64x2_sqrt(x.val))); +} + +#define OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(_Tpuvec, _Tpsvec, suffix, zsuffix, shiftWidth) \ +inline _Tpuvec v_abs(const _Tpsvec& x) \ +{ \ + v128_t s = wasm_##suffix##_shr(x.val, shiftWidth); \ + v128_t f = wasm_##zsuffix##_shr(x.val, shiftWidth); \ + return _Tpuvec(wasm_##zsuffix##_add(wasm_v128_xor(x.val, f), s)); \ +} + +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint8x16, v_int8x16, u8x16, i8x16, 7) +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint16x8, v_int16x8, u16x8, i16x8, 15) +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint32x4, v_int32x4, u32x4, i32x4, 31) + +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(wasm_f32x4_abs(x.val)); } +inline v_float64x2 v_abs(const v_float64x2& x) +{ + return v_float64x2(wasm_f64x2_abs(x.val)); +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_WASM_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +#define OPENCV_HAL_IMPL_WASM_BIN_FUNC_FALLBACK(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + fallback::_Tpvec a_(a); \ + fallback::_Tpvec b_(b); \ + return _Tpvec(fallback::func(a_, b_)); \ +} + +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_min, wasm_f32x4_min) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_max, wasm_f32x4_max) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_min, wasm_f64x2_min) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_max, wasm_f64x2_max) + +#define OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(_Tpvec, suffix) \ +inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(b.val, a.val, wasm_##suffix##_gt(a.val, b.val))); \ +} \ +inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, wasm_##suffix##_gt(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int8x16, i8x16) +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int16x8, i16x8) +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int32x4, i32x4) + +#define OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(_Tpvec, suffix, deltaNum) \ +inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t delta = wasm_##suffix##_splat(deltaNum); \ + v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \ + return _Tpvec(wasm_v128_bitselect(b.val, a.val, mask)); \ +} \ +inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t delta = wasm_##suffix##_splat(deltaNum); \ + v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask)); \ +} + +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint8x16, i8x16, (schar)0x80) +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint16x8, i16x8, (short)0x8000) +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint32x4, i32x4, (int)0x80000000) + +#define OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(_Tpvec, suffix, esuffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##esuffix##_eq(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##esuffix##_ne(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_lt(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_gt(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_le(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_ge(a.val, b.val)); } + +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint8x16, u8x16, i8x16) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int8x16, i8x16, i8x16) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint16x8, u16x8, i16x8) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int16x8, i16x8, i16x8) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint32x4, u32x4, i32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int32x4, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float32x4, f32x4, f32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float64x2, f64x2, f64x2) + +#define OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(_Tpvec, cast) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); } + +OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64) +OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ + v128_t z = wasm_i32x4_splat(0x7fffffff); + v128_t t = wasm_i32x4_splat(0x7f800000); + return v_float32x4(wasm_u32x4_lt(wasm_v128_and(a.val, z), t)); +} +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ + v128_t z = wasm_i64x2_splat(0x7fffffffffffffff); + v128_t t = wasm_i64x2_splat(0x7ff0000000000000); + return v_float64x2((__u64x2)(wasm_v128_and(a.val, z)) < (__u64x2)t); +} + +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_add_wrap, wasm_i8x16_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_add_wrap, wasm_i8x16_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_add_wrap, wasm_i16x8_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_add_wrap, wasm_i16x8_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_sub_wrap, wasm_i8x16_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_sub_wrap, wasm_i8x16_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_sub_wrap, wasm_i16x8_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_sub_wrap, wasm_i16x8_sub) +#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) >= (2000000) +// details: https://github.com/opencv/opencv/issues/18097 ( https://github.com/emscripten-core/emscripten/issues/12018 ) +OPENCV_HAL_IMPL_WASM_BIN_FUNC_FALLBACK(v_uint8x16, v_mul_wrap, wasm_i8x16_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC_FALLBACK(v_int8x16, v_mul_wrap, wasm_i8x16_mul) +#else +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_mul_wrap, wasm_i8x16_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_mul_wrap, wasm_i8x16_mul) +#endif +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_mul_wrap, wasm_i16x8_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_mul_wrap, wasm_i16x8_mul) + + +/** Absolute difference **/ + +inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = v_sub_wrap(a, b); + v_int8x16 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ + return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); +} +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ + v_int32x4 d = a - b; + v_int32x4 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = a - b; + v_int8x16 m = a < b; + return (d ^ m) - m; + } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_max(a, b) - v_min(a, b); } + + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return a * b + c; +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return a * b + c; +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return a * b + c; +} + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ + v128_t absmask_vec = wasm_i32x4_splat(0x7fffffff); + return v_float32x4(wasm_v128_and(wasm_f32x4_sub(a.val, b.val), absmask_vec)); +} +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ + v128_t absmask_vec = wasm_u64x2_shr(wasm_i32x4_splat(-1), 1); + return v_float64x2(wasm_v128_and(wasm_f64x2_sub(a.val, b.val), absmask_vec)); +} + +#define OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(_Tpvec, suffix) \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \ + v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \ + return _Tpvec(wasm_##suffix##_sqrt(wasm_##suffix##_add(a_Square, b_Square))); \ +} \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \ + v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \ + return _Tpvec(wasm_##suffix##_add(a_Square, b_Square)); \ +} \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ \ + return _Tpvec(wasm_##suffix##_add(wasm_##suffix##_mul(a.val, b.val), c.val)); \ +} + +OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float32x4, f32x4) +OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float64x2, f64x2) + +#define OPENCV_HAL_IMPL_WASM_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, ssuffix) \ +inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ +} \ +inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shl(const _Tpuvec& a) \ +{ \ + return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shl(const _Tpsvec& a) \ +{ \ + return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shr(const _Tpuvec& a) \ +{ \ + return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shr(const _Tpsvec& a) \ +{ \ + return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \ +} + +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint8x16, v_int8x16, i8x16, u8x16) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint16x8, v_int16x8, i16x8, u16x8) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint32x4, v_int32x4, i32x4, u32x4) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint64x2, v_int64x2, i64x2, u64x2) + +namespace hal_wasm_internal +{ + template 16)), + bool is_first = (imm == 0), + bool is_second = (imm == 16), + bool is_other = (((imm > 0) && (imm < 16)))> + class v_wasm_palignr_u8_class; + + template + class v_wasm_palignr_u8_class; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t& a, const v128_t&) const + { + return a; + } + }; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t&, const v128_t& b) const + { + return b; + } + }; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t& a, const v128_t& b) const + { + enum { imm2 = (sizeof(v128_t) - imm) }; + return wasm_v8x16_shuffle(a, b, + imm, imm+1, imm+2, imm+3, + imm+4, imm+5, imm+6, imm+7, + imm+8, imm+9, imm+10, imm+11, + imm+12, imm+13, imm+14, imm+15); + } + }; + + template + inline v128_t v_wasm_palignr_u8(const v128_t& a, const v128_t& b) + { + CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_wasm_palignr_u8."); + return v_wasm_palignr_u8_class()(a, b); + } +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a) +{ + using namespace hal_wasm_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + v128_t z = wasm_i8x16_splat(0); + return _Tpvec(v_wasm_palignr_u8(a.val, z)); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a) +{ + using namespace hal_wasm_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + v128_t z = wasm_i8x16_splat(0); + return _Tpvec(v_wasm_palignr_u8(z, a.val)); +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_wasm_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_wasm_palignr_u8(a.val, b.val)); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_wasm_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_wasm_palignr_u8(b.val, a.val)); +} + +#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(_Tpvec, _Tp) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(wasm_v128_load(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(wasm_v128_load(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ + _Tp tmp[_Tpvec::nlanes] = {0}; \ + for (int i=0; i<_Tpvec::nlanes/2; ++i) { \ + tmp[i] = ptr[i]; \ + } \ + return _Tpvec(wasm_v128_load(tmp)); \ +} \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + _Tp tmp[_Tpvec::nlanes]; \ + for (int i=0; i<_Tpvec::nlanes/2; ++i) { \ + tmp[i] = ptr0[i]; \ + tmp[i+_Tpvec::nlanes/2] = ptr1[i]; \ + } \ + return _Tpvec(wasm_v128_load(tmp)); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ \ + wasm_v128_store(ptr, a.val); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + fallback::v_store_low(ptr, a_); \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + fallback::v_store_high(ptr, a_); \ +} + +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint8x16, uchar) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int8x16, schar) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint16x8, ushort) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int16x8, short) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int32x4, int) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint64x2, uint64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int64x2, int64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float32x4, float) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double) + + +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); } + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); } + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \ + return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ +} + +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_uint32x4, unsigned, v128_t, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_int32x4, int, v128_t, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_float32x4, float, v128_t, f32x4, f32x4) + +// To do: Optimize v_reduce_sum with wasm intrin. +// Now use fallback implementation as there is no widening op in wasm intrin. + +#define OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(_Tpvec, scalartype) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::v_reduce_sum(a_); \ +} + +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint8x16, unsigned) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int8x16, int) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint16x8, unsigned) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int) + + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ + return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ +} +OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_uint64x2, uint64, v128_t, i64x2, i64x2) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_int64x2, int64, v128_t, i64x2, i64x2) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_float64x2, double, v128_t, f64x2,f64x2) + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v128_t ac = wasm_f32x4_add(wasm_unpacklo_i32x4(a.val, c.val), wasm_unpackhi_i32x4(a.val, c.val)); + v128_t bd = wasm_f32x4_add(wasm_unpacklo_i32x4(b.val, d.val), wasm_unpackhi_i32x4(b.val, d.val)); + return v_float32x4(wasm_f32x4_add(wasm_unpacklo_i32x4(ac, bd), wasm_unpackhi_i32x4(ac, bd))); +} + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP(_Tpvec, scalartype, func, scalar_func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + scalartype buf[_Tpvec::nlanes]; \ + v_store(buf, a); \ + scalartype tmp = buf[0]; \ + for (int i=1; i<_Tpvec::nlanes; ++i) { \ + tmp = scalar_func(tmp, buf[i]); \ + } \ + return tmp; \ +} + +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, min, std::min) + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + v_uint16x8 l16, h16; + v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32; + v_expand(v_absdiff(a, b), l16, h16); + v_expand(l16, l16_l32, l16_h32); + v_expand(h16, h16_l32, h16_h32); + return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + v_uint16x8 l16, h16; + v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32; + v_expand(v_absdiff(a, b), l16, h16); + v_expand(l16, l16_l32, l16_h32); + v_expand(h16, h16_l32, h16_h32); + return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ + v128_t m1 = wasm_i32x4_splat(0x55555555); + v128_t m2 = wasm_i32x4_splat(0x33333333); + v128_t m4 = wasm_i32x4_splat(0x0f0f0f0f); + v128_t p = a.val; + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 1), m1), wasm_v128_and(p, m1)); + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 2), m2), wasm_v128_and(p, m2)); + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 4), m4), wasm_v128_and(p, m4)); + return v_uint8x16(p); +} +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff); +} +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff); +} +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ + fallback::v_uint64x2 a_(a); + return fallback::v_popcount(a_); +} +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +#define OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(_Tpvec, suffix, scalarType) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::v_signmask(a_); \ +} \ +inline bool v_check_all(const _Tpvec& a) \ +{ return wasm_i8x16_all_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0))); } \ +inline bool v_check_any(const _Tpvec& a) \ +{ return wasm_i8x16_any_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0)));; } + +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint8x16, i8x16, schar) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int8x16, i8x16, schar) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint16x8, i16x8, short) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int16x8, i16x8, short) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint32x4, i32x4, int) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int32x4, i32x4, int) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float32x4, i32x4, float) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float64x2, f64x2, double) + +#define OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(_Tpvec, suffix, esuffix) \ +inline bool v_check_all(const _Tpvec& a) \ +{ \ + v128_t masked = v_reinterpret_as_##esuffix(a).val; \ + masked = wasm_i32x4_replace_lane(masked, 0, 0xffffffff); \ + masked = wasm_i32x4_replace_lane(masked, 2, 0xffffffff); \ + return wasm_i8x16_all_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \ +} \ +inline bool v_check_any(const _Tpvec& a) \ +{ \ + v128_t masked = v_reinterpret_as_##esuffix(a).val; \ + masked = wasm_i32x4_replace_lane(masked, 0, 0x0); \ + masked = wasm_i32x4_replace_lane(masked, 2, 0x0); \ + return wasm_i8x16_any_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \ +} \ + +OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(v_int64x2, i32x4, s32) +OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(v_uint64x2, i32x4, u32) + + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +#define OPENCV_HAL_IMPL_WASM_SELECT(_Tpvec) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask.val)); \ +} + +OPENCV_HAL_IMPL_WASM_SELECT(v_uint8x16) +OPENCV_HAL_IMPL_WASM_SELECT(v_int8x16) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint16x8) +OPENCV_HAL_IMPL_WASM_SELECT(v_int16x8) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_int32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint64x2) +OPENCV_HAL_IMPL_WASM_SELECT(v_int64x2) +OPENCV_HAL_IMPL_WASM_SELECT(v_float32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_float64x2) + +#define OPENCV_HAL_IMPL_WASM_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = intrin(a.val); \ + b1.val = __CV_CAT(intrin, _high)(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ return _Tpwvec(intrin(a.val)); } \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + v128_t a = wasm_v128_load(ptr); \ + return _Tpwvec(intrin(a)); \ +} + +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint8x16, v_uint16x8, uchar, v128_cvtu8x16_i16x8) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int8x16, v_int16x8, schar, v128_cvti8x16_i16x8) +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint16x8, v_uint32x4, ushort, v128_cvtu16x8_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int16x8, v_int32x4, short, v128_cvti16x8_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint32x4, v_uint64x2, unsigned, v128_cvtu32x4_i64x2) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int32x4, v_int64x2, int, v128_cvti32x4_i64x2) + +#define OPENCV_HAL_IMPL_WASM_EXPAND_Q(_Tpvec, _Tp, intrin) \ +inline _Tpvec v_load_expand_q(const _Tp* ptr) \ +{ \ + v128_t a = wasm_v128_load(ptr); \ + return _Tpvec(intrin(a)); \ +} + +OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_uint32x4, uchar, v128_cvtu8x16_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_int32x4, schar, v128_cvti8x16_i32x4) + +#define OPENCV_HAL_IMPL_WASM_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = wasm_unpacklo_##suffix(a0.val, a1.val); \ + b1.val = wasm_unpackhi_##suffix(a0.val, a1.val); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_unpacklo_i64x2(a.val, b.val)); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_unpackhi_i64x2(a.val, b.val)); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + c.val = wasm_unpacklo_i64x2(a.val, b.val); \ + d.val = wasm_unpackhi_i64x2(a.val, b.val); \ +} + +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint8x16, i8x16) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int8x16, i8x16) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint16x8, i16x8) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int16x8, i16x8) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_float32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_float64x2, i64x2) + +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ + return v_rotate_right(a, b); +} + +inline v_int32x4 v_round(const v_float32x4& a) +{ + v128_t h = wasm_f32x4_splat(0.5); + return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(wasm_f32x4_add(a.val, h))); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_lt(a.val, wasm_f32x4_convert_i32x4(a1)); + return v_int32x4(wasm_i32x4_add(a1, mask)); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_gt(a.val, wasm_f32x4_convert_i32x4(a1)); + return v_int32x4(wasm_i32x4_sub(a1, mask)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); } + +#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \ +inline _Tpnvec func(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::func(a_); \ +} + +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int, v_float64x2, v_int32x4, double, int) + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + fallback::v_float64x2 a_(a), b_(b); + return fallback::v_round(a_, b_); +} + +#define OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(_Tpvec, suffix) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + v128_t t0 = wasm_unpacklo_##suffix(a0.val, a1.val); \ + v128_t t1 = wasm_unpacklo_##suffix(a2.val, a3.val); \ + v128_t t2 = wasm_unpackhi_##suffix(a0.val, a1.val); \ + v128_t t3 = wasm_unpackhi_##suffix(a2.val, a3.val); \ +\ + b0.val = wasm_unpacklo_i64x2(t0, t1); \ + b1.val = wasm_unpackhi_i64x2(t0, t1); \ + b2.val = wasm_unpacklo_i64x2(t2, t3); \ + b3.val = wasm_unpackhi_i64x2(t2, t3); \ +} + +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_uint32x4, i32x4) +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_int32x4, i32x4) +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_float32x4, i32x4) + +// load deinterleave +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) +{ + v128_t t00 = wasm_v128_load(ptr); + v128_t t01 = wasm_v128_load(ptr + 16); + + a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30); + b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) +{ + v128_t t00 = wasm_v128_load(ptr); + v128_t t01 = wasm_v128_load(ptr + 16); + v128_t t02 = wasm_v128_load(ptr + 32); + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) +{ + v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ... + v128_t u1 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... + v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ... + v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ... + + v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 a2 b2 a3 b3 + v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7 + b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7 +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 b1 c1 a2 b2 + v128_t t01 = wasm_v128_load(ptr + 8); // c2 a3 b3 c3 a4 b4 c4 a5 + v128_t t02 = wasm_v128_load(ptr + 16); // b5 c5 a6 b6 c6 a7 b7 c7 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) +{ + v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1 + v128_t u1 = wasm_v128_load(ptr + 8); // a2 b2 c2 d2 ... + v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... + v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ... + + v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3 + v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7 + v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3 + v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 + v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 + v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 + v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 s0(wasm_v128_load(ptr)); // a0 b0 c0 d0 + v_uint32x4 s1(wasm_v128_load(ptr + 4)); // a1 b1 c1 d1 + v_uint32x4 s2(wasm_v128_load(ptr + 8)); // a2 b2 c2 d2 + v_uint32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 + v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 + v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 + v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) +{ + v_float32x4 s0(wasm_v128_load(ptr)); // a0 b0 c0 d0 + v_float32x4 s1(wasm_v128_load(ptr + 4)); // a1 b1 c1 d1 + v_float32x4 s2(wasm_v128_load(ptr + 8)); // a2 b2 c2 d2 + v_float32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b) +{ + v128_t t0 = wasm_v128_load(ptr); // a0 b0 + v128_t t1 = wasm_v128_load(ptr + 2); // a1 b1 + + a.val = wasm_unpacklo_i64x2(t0, t1); + b.val = wasm_unpackhi_i64x2(t0, t1); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c) +{ + v128_t t0 = wasm_v128_load(ptr); // a0, b0 + v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1 + v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1 + + a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); + b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23); + c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, + v_uint64x2& b, v_uint64x2& c, v_uint64x2& d) +{ + v128_t t0 = wasm_v128_load(ptr); // a0 b0 + v128_t t1 = wasm_v128_load(ptr + 2); // c0 d0 + v128_t t2 = wasm_v128_load(ptr + 4); // a1 b1 + v128_t t3 = wasm_v128_load(ptr + 6); // c1 d1 + + a.val = wasm_unpacklo_i64x2(t0, t2); + b.val = wasm_unpackhi_i64x2(t0, t2); + c.val = wasm_unpacklo_i64x2(t1, t3); + d.val = wasm_unpackhi_i64x2(t1, t3); +} + +// store interleave + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i8x16(a.val, b.val); + v128_t v1 = wasm_unpackhi_i8x16(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 16, v1); +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 16, t11); + wasm_v128_store(ptr + 32, t12); +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, const v_uint8x16& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + v128_t u0 = wasm_unpacklo_i8x16(a.val, c.val); // a0 c0 a1 c1 ... + v128_t u1 = wasm_unpackhi_i8x16(a.val, c.val); // a8 c8 a9 c9 ... + v128_t u2 = wasm_unpacklo_i8x16(b.val, d.val); // b0 d0 b1 d1 ... + v128_t u3 = wasm_unpackhi_i8x16(b.val, d.val); // b8 d8 b9 d9 ... + + v128_t v0 = wasm_unpacklo_i8x16(u0, u2); // a0 b0 c0 d0 ... + v128_t v1 = wasm_unpackhi_i8x16(u0, u2); // a4 b4 c4 d4 ... + v128_t v2 = wasm_unpacklo_i8x16(u1, u3); // a8 b8 c8 d8 ... + v128_t v3 = wasm_unpackhi_i8x16(u1, u3); // a12 b12 c12 d12 ... + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 16, v1); + wasm_v128_store(ptr + 32, v2); + wasm_v128_store(ptr + 48, v3); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i16x8(a.val, b.val); + v128_t v1 = wasm_unpackhi_i16x8(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 8, v1); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, + const v_uint16x8& b, const v_uint16x8& c, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 8, t11); + wasm_v128_store(ptr + 16, t12); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + const v_uint16x8& c, const v_uint16x8& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + v128_t u0 = wasm_unpacklo_i16x8(a.val, c.val); // a0 c0 a1 c1 ... + v128_t u1 = wasm_unpackhi_i16x8(a.val, c.val); // a4 c4 a5 c5 ... + v128_t u2 = wasm_unpacklo_i16x8(b.val, d.val); // b0 d0 b1 d1 ... + v128_t u3 = wasm_unpackhi_i16x8(b.val, d.val); // b4 d4 b5 d5 ... + + v128_t v0 = wasm_unpacklo_i16x8(u0, u2); // a0 b0 c0 d0 ... + v128_t v1 = wasm_unpackhi_i16x8(u0, u2); // a2 b2 c2 d2 ... + v128_t v2 = wasm_unpacklo_i16x8(u1, u3); // a4 b4 c4 d4 ... + v128_t v3 = wasm_unpackhi_i16x8(u1, u3); // a6 b6 c6 d6 ... + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 8, v1); + wasm_v128_store(ptr + 16, v2); + wasm_v128_store(ptr + 24, v3); +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val); + v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 4, v1); +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 4, t11); + wasm_v128_store(ptr + 8, t12); +} + +inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v_uint32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + wasm_v128_store(ptr, v0.val); + wasm_v128_store(ptr + 4, v1.val); + wasm_v128_store(ptr + 8, v2.val); + wasm_v128_store(ptr + 12, v3.val); +} + +// 2-channel, float only +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val); + v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 4, v1); +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 4, t11); + wasm_v128_store(ptr + 8, t12); +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v_float32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + wasm_v128_store(ptr, v0.val); + wasm_v128_store(ptr + 4, v1.val); + wasm_v128_store(ptr + 8, v2.val); + wasm_v128_store(ptr + 12, v3.val); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val); + v128_t v1 = wasm_unpackhi_i64x2(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); + wasm_v128_store(ptr + 4, v2); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, const v_uint64x2& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val); + v128_t v1 = wasm_unpacklo_i64x2(c.val, d.val); + v128_t v2 = wasm_unpackhi_i64x2(a.val, b.val); + v128_t v3 = wasm_unpackhi_i64x2(c.val, d.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); + wasm_v128_store(ptr + 4, v2); + wasm_v128_store(ptr + 6, v3); +} + +#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(wasm_f32x4_convert_i32x4(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + fallback::v_float64x2 a_(a); + return fallback::v_cvt_f32(a_); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + fallback::v_float64x2 a_(a), b_(b); + return fallback::v_cvt_f32(a_, b_); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t p = v128_cvti32x4_i64x2(a.val); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); +#else + fallback::v_int32x4 a_(a); + return fallback::v_cvt_f64(a_); +#endif +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t p = v128_cvti32x4_i64x2_high(a.val); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); +#else + fallback::v_int32x4 a_(a); + return fallback::v_cvt_f64_high(a_); +#endif +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + fallback::v_float32x4 a_(a); + return fallback::v_cvt_f64(a_); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + fallback::v_float32x4 a_(a); + return fallback::v_cvt_f64_high(a_); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + return v_float64x2(wasm_f64x2_convert_i64x2(a.val)); +#else + fallback::v_int64x2 a_(a); + return fallback::v_cvt_f64(a_); +#endif +} + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1], tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1], + tab[idx[4]], tab[idx[4]+1], tab[idx[5]], tab[idx[5]+1], tab[idx[6]], tab[idx[6]+1], tab[idx[7]], tab[idx[7]+1]); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3], tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3], + tab[idx[2]], tab[idx[2]+1], tab[idx[2]+2], tab[idx[2]+3], tab[idx[3]], tab[idx[3]+1], tab[idx[3]+2], tab[idx[3]+3]); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], + tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1], + tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1]); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3], + tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3]); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[1]], + tab[idx[2]], tab[idx[3]]); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[0]+1], + tab[idx[1]], tab[idx[1]+1]); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(wasm_v128_load(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(wasm_v128_load(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(wasm_v128_load(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + return v_int32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + return v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + return v_float64x2(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)]); +} + +// loads pairs from the table and deinterleaves them, e.g. returns: +// x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]), +// y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1]) +// note that the indices are float's indices, not the float-pair indices. +// in theory, this function can be used to implement bilinear interpolation, +// when idxvec are the offsets within the image. +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + x = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); + y = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)+1]); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + v128_t xy0 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 0)); + v128_t xy1 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 1)); + x.val = wasm_unpacklo_i64x2(xy0, xy1); + y.val = wasm_unpacklo_i64x2(xy0, xy1); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15)); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15)); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15)); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) +{ + return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); +} + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +template +inline typename _Tp::lane_type v_extract_n(const _Tp& a) +{ + return v_rotate_right(a).get0(); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& a) +{ + return v_setall_u32(v_extract_n(a)); +} +template +inline v_int32x4 v_broadcast_element(const v_int32x4& a) +{ + return v_setall_s32(v_extract_n(a)); +} +template +inline v_float32x4 v_broadcast_element(const v_float32x4& a) +{ + return v_setall_f32(v_extract_n(a)); +} + + +////////////// FP16 support /////////////////////////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + return fallback::v_load_expand(ptr); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + fallback::v_float32x4 v_(v); + fallback::v_pack_store(ptr, v_); +} + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/msa_macros.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/msa_macros.h new file mode 100644 index 0000000..bd6ddb1 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/msa_macros.h @@ -0,0 +1,1558 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_HAL_MSA_MACROS_H +#define OPENCV_CORE_HAL_MSA_MACROS_H + +#ifdef __mips_msa +#include "msa.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Define 64 bits vector types */ +typedef signed char v8i8 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned char v8u8 __attribute__ ((vector_size(8), aligned(8))); +typedef short v4i16 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned short v4u16 __attribute__ ((vector_size(8), aligned(8))); +typedef int v2i32 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned int v2u32 __attribute__ ((vector_size(8), aligned(8))); +typedef long long v1i64 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned long long v1u64 __attribute__ ((vector_size(8), aligned(8))); +typedef float v2f32 __attribute__ ((vector_size(8), aligned(8))); +typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); + + +/* Load values from the given memory a 64-bit vector. */ +#define msa_ld1_s8(__a) (*((v8i8*)(__a))) +#define msa_ld1_s16(__a) (*((v4i16*)(__a))) +#define msa_ld1_s32(__a) (*((v2i32*)(__a))) +#define msa_ld1_s64(__a) (*((v1i64*)(__a))) +#define msa_ld1_u8(__a) (*((v8u8*)(__a))) +#define msa_ld1_u16(__a) (*((v4u16*)(__a))) +#define msa_ld1_u32(__a) (*((v2u32*)(__a))) +#define msa_ld1_u64(__a) (*((v1u64*)(__a))) +#define msa_ld1_f32(__a) (*((v2f32*)(__a))) +#define msa_ld1_f64(__a) (*((v1f64*)(__a))) + +/* Load values from the given memory address to a 128-bit vector */ +#define msa_ld1q_s8(__a) ((v16i8)__builtin_msa_ld_b(__a, 0)) +#define msa_ld1q_s16(__a) ((v8i16)__builtin_msa_ld_h(__a, 0)) +#define msa_ld1q_s32(__a) ((v4i32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_s64(__a) ((v2i64)__builtin_msa_ld_d(__a, 0)) +#define msa_ld1q_u8(__a) ((v16u8)__builtin_msa_ld_b(__a, 0)) +#define msa_ld1q_u16(__a) ((v8u16)__builtin_msa_ld_h(__a, 0)) +#define msa_ld1q_u32(__a) ((v4u32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_u64(__a) ((v2u64)__builtin_msa_ld_d(__a, 0)) +#define msa_ld1q_f32(__a) ((v4f32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_f64(__a) ((v2f64)__builtin_msa_ld_d(__a, 0)) + +/* Store 64bits vector elements values to the given memory address. */ +#define msa_st1_s8(__a, __b) (*((v8i8*)(__a)) = __b) +#define msa_st1_s16(__a, __b) (*((v4i16*)(__a)) = __b) +#define msa_st1_s32(__a, __b) (*((v2i32*)(__a)) = __b) +#define msa_st1_s64(__a, __b) (*((v1i64*)(__a)) = __b) +#define msa_st1_u8(__a, __b) (*((v8u8*)(__a)) = __b) +#define msa_st1_u16(__a, __b) (*((v4u16*)(__a)) = __b) +#define msa_st1_u32(__a, __b) (*((v2u32*)(__a)) = __b) +#define msa_st1_u64(__a, __b) (*((v1u64*)(__a)) = __b) +#define msa_st1_f32(__a, __b) (*((v2f32*)(__a)) = __b) +#define msa_st1_f64(__a, __b) (*((v1f64*)(__a)) = __b) + +/* Store the values of elements in the 128 bits vector __a to the given memory address __a. */ +#define msa_st1q_s8(__a, __b) (__builtin_msa_st_b((v16i8)(__b), __a, 0)) +#define msa_st1q_s16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0)) +#define msa_st1q_s32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_s64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) +#define msa_st1q_u8(__a, __b) (__builtin_msa_st_b((v16i8)(__b), __a, 0)) +#define msa_st1q_u16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0)) +#define msa_st1q_u32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_u64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) +#define msa_st1q_f32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_f64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) + +/* Store the value of the element with the index __c in vector __a to the given memory address __a. */ +#define msa_st1_lane_s8(__a, __b, __c) (*((int8_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s16(__a, __b, __c) (*((int16_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s32(__a, __b, __c) (*((int32_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s64(__a, __b, __c) (*((int64_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u8(__a, __b, __c) (*((uint8_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u16(__a, __b, __c) (*((uint16_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u32(__a, __b, __c) (*((uint32_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u64(__a, __b, __c) (*((uint64_t*)(__a)) = __b[__c]) +#define msa_st1_lane_f32(__a, __b, __c) (*((float*)(__a)) = __b[__c]) +#define msa_st1_lane_f64(__a, __b, __c) (*((double*)(__a)) = __b[__c]) +#define msa_st1q_lane_s8(__a, __b, __c) (*((int8_t*)(__a)) = (int8_t)__builtin_msa_copy_s_b(__b, __c)) +#define msa_st1q_lane_s16(__a, __b, __c) (*((int16_t*)(__a)) = (int16_t)__builtin_msa_copy_s_h(__b, __c)) +#define msa_st1q_lane_s32(__a, __b, __c) (*((int32_t*)(__a)) = __builtin_msa_copy_s_w(__b, __c)) +#define msa_st1q_lane_s64(__a, __b, __c) (*((int64_t*)(__a)) = __builtin_msa_copy_s_d(__b, __c)) +#define msa_st1q_lane_u8(__a, __b, __c) (*((uint8_t*)(__a)) = (uint8_t)__builtin_msa_copy_u_b((v16i8)(__b), __c)) +#define msa_st1q_lane_u16(__a, __b, __c) (*((uint16_t*)(__a)) = (uint16_t)__builtin_msa_copy_u_h((v8i16)(__b), __c)) +#define msa_st1q_lane_u32(__a, __b, __c) (*((uint32_t*)(__a)) = __builtin_msa_copy_u_w((v4i32)(__b), __c)) +#define msa_st1q_lane_u64(__a, __b, __c) (*((uint64_t*)(__a)) = __builtin_msa_copy_u_d((v2i64)(__b), __c)) +#define msa_st1q_lane_f32(__a, __b, __c) (*((float*)(__a)) = __b[__c]) +#define msa_st1q_lane_f64(__a, __b, __c) (*((double*)(__a)) = __b[__c]) + +/* Duplicate elements for 64-bit doubleword vectors */ +#define msa_dup_n_s8(__a) ((v8i8)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0)) +#define msa_dup_n_s16(__a) ((v4i16)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0)) +#define msa_dup_n_s32(__a) ((v2i32){__a, __a}) +#define msa_dup_n_s64(__a) ((v1i64){__a}) +#define msa_dup_n_u8(__a) ((v8u8)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0)) +#define msa_dup_n_u16(__a) ((v4u16)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0)) +#define msa_dup_n_u32(__a) ((v2u32){__a, __a}) +#define msa_dup_n_u64(__a) ((v1u64){__a}) +#define msa_dup_n_f32(__a) ((v2f32){__a, __a}) +#define msa_dup_n_f64(__a) ((v1f64){__a}) + +/* Duplicate elements for 128-bit quadword vectors */ +#define msa_dupq_n_s8(__a) (__builtin_msa_fill_b((int32_t)(__a))) +#define msa_dupq_n_s16(__a) (__builtin_msa_fill_h((int32_t)(__a))) +#define msa_dupq_n_s32(__a) (__builtin_msa_fill_w((int32_t)(__a))) +#define msa_dupq_n_s64(__a) (__builtin_msa_fill_d((int64_t)(__a))) +#define msa_dupq_n_u8(__a) ((v16u8)__builtin_msa_fill_b((int32_t)(__a))) +#define msa_dupq_n_u16(__a) ((v8u16)__builtin_msa_fill_h((int32_t)(__a))) +#define msa_dupq_n_u32(__a) ((v4u32)__builtin_msa_fill_w((int32_t)(__a))) +#define msa_dupq_n_u64(__a) ((v2u64)__builtin_msa_fill_d((int64_t)(__a))) +#define msa_dupq_n_f32(__a) ((v4f32){__a, __a, __a, __a}) +#define msa_dupq_n_f64(__a) ((v2f64){__a, __a}) +#define msa_dupq_lane_s8(__a, __b) (__builtin_msa_splat_b(__a, __b)) +#define msa_dupq_lane_s16(__a, __b) (__builtin_msa_splat_h(__a, __b)) +#define msa_dupq_lane_s32(__a, __b) (__builtin_msa_splat_w(__a, __b)) +#define msa_dupq_lane_s64(__a, __b) (__builtin_msa_splat_d(__a, __b)) +#define msa_dupq_lane_u8(__a, __b) ((v16u8)__builtin_msa_splat_b((v16i8)(__a), __b)) +#define msa_dupq_lane_u16(__a, __b) ((v8u16)__builtin_msa_splat_h((v8i16)(__a), __b)) +#define msa_dupq_lane_u32(__a, __b) ((v4u32)__builtin_msa_splat_w((v4i32)(__a), __b)) +#define msa_dupq_lane_u64(__a, __b) ((v2u64)__builtin_msa_splat_d((v2i64)(__a), __b)) + +/* Create a 64 bits vector */ +#define msa_create_s8(__a) ((v8i8)((uint64_t)(__a))) +#define msa_create_s16(__a) ((v4i16)((uint64_t)(__a))) +#define msa_create_s32(__a) ((v2i32)((uint64_t)(__a))) +#define msa_create_s64(__a) ((v1i64)((uint64_t)(__a))) +#define msa_create_u8(__a) ((v8u8)((uint64_t)(__a))) +#define msa_create_u16(__a) ((v4u16)((uint64_t)(__a))) +#define msa_create_u32(__a) ((v2u32)((uint64_t)(__a))) +#define msa_create_u64(__a) ((v1u64)((uint64_t)(__a))) +#define msa_create_f32(__a) ((v2f32)((uint64_t)(__a))) +#define msa_create_f64(__a) ((v1f64)((uint64_t)(__a))) + +/* Sign extends or zero extends each element in a 64 bits vector to twice its original length, and places the results in a 128 bits vector. */ +/*Transform v8i8 to v8i16*/ +#define msa_movl_s8(__a) \ +((v8i16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \ + (__a)[4], (__a)[5], (__a)[6], (__a)[7]}) + +/*Transform v8u8 to v8u16*/ +#define msa_movl_u8(__a) \ +((v8u16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \ + (__a)[4], (__a)[5], (__a)[6], (__a)[7]}) + +/*Transform v4i16 to v8i16*/ +#define msa_movl_s16(__a) ((v4i32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]}) + +/*Transform v2i32 to v4i32*/ +#define msa_movl_s32(__a) ((v2i64){(__a)[0], (__a)[1]}) + +/*Transform v4u16 to v8u16*/ +#define msa_movl_u16(__a) ((v4u32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]}) + +/*Transform v2u32 to v4u32*/ +#define msa_movl_u32(__a) ((v2u64){(__a)[0], (__a)[1]}) + +/* Copies the least significant half of each element of a 128 bits vector into the corresponding elements of a 64 bits vector. */ +#define msa_movn_s16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_s32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_s64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* qmovn */ +#define msa_qmovn_s16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_s32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_s64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* qmovun */ +#define msa_qmovun_s16(__a) \ +({ \ + v8i16 __d = __builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qmovun_s32(__a) \ +({ \ + v4i32 __d = __builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qmovun_s64(__a) \ +({ \ + v2i64 __d = __builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */ +#define msa_shrn_n_s16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__b))); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_s32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__b))); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_s64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__b))); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__b))); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__b))); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__b))); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */ +#define msa_rshrn_n_s16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)__b)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_s32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)__b)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_s64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)__b)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)__b)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)__b)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)__b)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector. */ +#define msa_qrshrn_n_s16(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__b)), 7); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_s32(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__b)), 15); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_s64(__a, __b) \ +({ \ + v2i64 __d = __builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__b)), 31); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u16(__a, __b) \ +({ \ + v8u16 __d = __builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__b)), 7); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u32(__a, __b) \ +({ \ + v4u32 __d = __builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__b)), 15); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u64(__a, __b) \ +({ \ + v2u64 __d = __builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__b)), 31); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector. + Input is signed and output is unsigned. */ +#define msa_qrshrun_n_s16(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__b)); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrun_n_s32(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__b)); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrun_n_s64(__a, __b) \ +({ \ + v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__b)); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* pack */ +#define msa_pack_s16(__a, __b) (__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a))) +#define msa_pack_s32(__a, __b) (__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a))) +#define msa_pack_s64(__a, __b) (__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a))) +#define msa_pack_u16(__a, __b) ((v16u8)__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a))) +#define msa_pack_u32(__a, __b) ((v8u16)__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a))) +#define msa_pack_u64(__a, __b) ((v4u32)__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a))) + +/* qpack */ +#define msa_qpack_s16(__a, __b) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h((v8i16)(__b), 7), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7))) +#define msa_qpack_s32(__a, __b) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w((v4i32)(__b), 15), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15))) +#define msa_qpack_s64(__a, __b) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d((v2i64)(__b), 31), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31))) +#define msa_qpack_u16(__a, __b) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__b), 7), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7))) +#define msa_qpack_u32(__a, __b) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__b), 15), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15))) +#define msa_qpack_u64(__a, __b) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__b), 31), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31))) + +/* qpacku */ +#define msa_qpacku_s16(__a, __b) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b))), 7), \ + (v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a))), 7))) +#define msa_qpacku_s32(__a, __b) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b))), 15), \ + (v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a))), 15))) +#define msa_qpacku_s64(__a, __b) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b))), 31), \ + (v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a))), 31))) + +/* packr */ +#define msa_packr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_srai_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__c)))) +#define msa_packr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_srai_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__c)))) +#define msa_packr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_srai_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__c)))) +#define msa_packr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srli_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__c)))) +#define msa_packr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srli_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__c)))) +#define msa_packr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srli_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__c)))) + +/* rpackr */ +#define msa_rpackr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)(__c)))) +#define msa_rpackr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)(__c)))) +#define msa_rpackr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)(__c)))) +#define msa_rpackr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c)))) +#define msa_rpackr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c)))) +#define msa_rpackr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c)))) + +/* qrpackr */ +#define msa_qrpackr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), 7), \ + (v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__c)), 7))) +#define msa_qrpackr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), 15), \ + (v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__c)), 15))) +#define msa_qrpackr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), 31), \ + (v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__c)), 31))) +#define msa_qrpackr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), 7), \ + (v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c)), 7))) +#define msa_qrpackr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), 15), \ + (v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c)), 15))) +#define msa_qrpackr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), 31), \ + (v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c)), 31))) + +/* qrpackru */ +#define msa_qrpackru_s16(__a, __b, __c) \ +({ \ + v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__c)); \ + v8i16 __e = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b)), (int)(__c)); \ + (v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__e, 7), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ +}) + +#define msa_qrpackru_s32(__a, __b, __c) \ +({ \ + v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__c)); \ + v4i32 __e = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b)), (int)(__c)); \ + (v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__e, 15), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ +}) + +#define msa_qrpackru_s64(__a, __b, __c) \ +({ \ + v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__c)); \ + v2i64 __e = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b)), (int)(__c)); \ + (v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__e, 31), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ +}) + +/* Minimum values between corresponding elements in the two vectors are written to the returned vector. */ +#define msa_minq_s8(__a, __b) (__builtin_msa_min_s_b(__a, __b)) +#define msa_minq_s16(__a, __b) (__builtin_msa_min_s_h(__a, __b)) +#define msa_minq_s32(__a, __b) (__builtin_msa_min_s_w(__a, __b)) +#define msa_minq_s64(__a, __b) (__builtin_msa_min_s_d(__a, __b)) +#define msa_minq_u8(__a, __b) ((v16u8)__builtin_msa_min_u_b(__a, __b)) +#define msa_minq_u16(__a, __b) ((v8u16)__builtin_msa_min_u_h(__a, __b)) +#define msa_minq_u32(__a, __b) ((v4u32)__builtin_msa_min_u_w(__a, __b)) +#define msa_minq_u64(__a, __b) ((v2u64)__builtin_msa_min_u_d(__a, __b)) +#define msa_minq_f32(__a, __b) (__builtin_msa_fmin_w(__a, __b)) +#define msa_minq_f64(__a, __b) (__builtin_msa_fmin_d(__a, __b)) + +/* Maximum values between corresponding elements in the two vectors are written to the returned vector. */ +#define msa_maxq_s8(__a, __b) (__builtin_msa_max_s_b(__a, __b)) +#define msa_maxq_s16(__a, __b) (__builtin_msa_max_s_h(__a, __b)) +#define msa_maxq_s32(__a, __b) (__builtin_msa_max_s_w(__a, __b)) +#define msa_maxq_s64(__a, __b) (__builtin_msa_max_s_d(__a, __b)) +#define msa_maxq_u8(__a, __b) ((v16u8)__builtin_msa_max_u_b(__a, __b)) +#define msa_maxq_u16(__a, __b) ((v8u16)__builtin_msa_max_u_h(__a, __b)) +#define msa_maxq_u32(__a, __b) ((v4u32)__builtin_msa_max_u_w(__a, __b)) +#define msa_maxq_u64(__a, __b) ((v2u64)__builtin_msa_max_u_d(__a, __b)) +#define msa_maxq_f32(__a, __b) (__builtin_msa_fmax_w(__a, __b)) +#define msa_maxq_f64(__a, __b) (__builtin_msa_fmax_d(__a, __b)) + +/* Vector type reinterpretion */ +#define MSA_TPV_REINTERPRET(_Tpv, Vec) ((_Tpv)(Vec)) + +/* Add the odd elements in vector __a with the even elements in vector __b to double width elements in the returned vector. */ +/* v8i16 msa_hadd_s16 ((v16i8)__a, (v16i8)__b) */ +#define msa_hadd_s16(__a, __b) (__builtin_msa_hadd_s_h((v16i8)(__a), (v16i8)(__b))) +/* v4i32 msa_hadd_s32 ((v8i16)__a, (v8i16)__b) */ +#define msa_hadd_s32(__a, __b) (__builtin_msa_hadd_s_w((v8i16)(__a), (v8i16)(__b))) +/* v2i64 msa_hadd_s64 ((v4i32)__a, (v4i32)__b) */ +#define msa_hadd_s64(__a, __b) (__builtin_msa_hadd_s_d((v4i32)(__a), (v4i32)(__b))) + +/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */ +#define msa_pckev_s8(__a, __b) (__builtin_msa_pckev_b((v16i8)(__a), (v16i8)(__b))) +#define msa_pckev_s16(__a, __b) (__builtin_msa_pckev_h((v8i16)(__a), (v8i16)(__b))) +#define msa_pckev_s32(__a, __b) (__builtin_msa_pckev_w((v4i32)(__a), (v4i32)(__b))) +#define msa_pckev_s64(__a, __b) (__builtin_msa_pckev_d((v2i64)(__a), (v2i64)(__b))) + +/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */ +#define msa_pckod_s8(__a, __b) (__builtin_msa_pckod_b((v16i8)(__a), (v16i8)(__b))) +#define msa_pckod_s16(__a, __b) (__builtin_msa_pckod_h((v8i16)(__a), (v8i16)(__b))) +#define msa_pckod_s32(__a, __b) (__builtin_msa_pckod_w((v4i32)(__a), (v4i32)(__b))) +#define msa_pckod_s64(__a, __b) (__builtin_msa_pckod_d((v2i64)(__a), (v2i64)(__b))) + +#ifdef _MIPSEB +#define LANE_IMM0_1(x) (0b1 - ((x) & 0b1)) +#define LANE_IMM0_3(x) (0b11 - ((x) & 0b11)) +#define LANE_IMM0_7(x) (0b111 - ((x) & 0b111)) +#define LANE_IMM0_15(x) (0b1111 - ((x) & 0b1111)) +#else +#define LANE_IMM0_1(x) ((x) & 0b1) +#define LANE_IMM0_3(x) ((x) & 0b11) +#define LANE_IMM0_7(x) ((x) & 0b111) +#define LANE_IMM0_15(x) ((x) & 0b1111) +#endif + +#define msa_get_lane_u8(__a, __b) ((uint8_t)(__a)[LANE_IMM0_7(__b)]) +#define msa_get_lane_s8(__a, __b) ((int8_t)(__a)[LANE_IMM0_7(__b)]) +#define msa_get_lane_u16(__a, __b) ((uint16_t)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_s16(__a, __b) ((int16_t)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_u32(__a, __b) ((uint32_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_s32(__a, __b) ((int32_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_f32(__a, __b) ((float)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_s64(__a, __b) ((int64_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_u64(__a, __b) ((uint64_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_f64(__a, __b) ((double)(__a)[LANE_IMM0_1(__b)]) +#define msa_getq_lane_u8(__a, imm0_15) ((uint8_t)__builtin_msa_copy_u_b((v16i8)(__a), imm0_15)) +#define msa_getq_lane_s8(__a, imm0_15) ((int8_t)__builtin_msa_copy_s_b(__a, imm0_15)) +#define msa_getq_lane_u16(__a, imm0_7) ((uint16_t)__builtin_msa_copy_u_h((v8i16)(__a), imm0_7)) +#define msa_getq_lane_s16(__a, imm0_7) ((int16_t)__builtin_msa_copy_s_h(__a, imm0_7)) +#define msa_getq_lane_u32(__a, imm0_3) __builtin_msa_copy_u_w((v4i32)(__a), imm0_3) +#define msa_getq_lane_s32 __builtin_msa_copy_s_w +#define msa_getq_lane_f32(__a, __b) ((float)(__a)[LANE_IMM0_3(__b)]) +#define msa_getq_lane_f64(__a, __b) ((double)(__a)[LANE_IMM0_1(__b)]) +#if (__mips == 64) +#define msa_getq_lane_u64(__a, imm0_1) __builtin_msa_copy_u_d((v2i64)(__a), imm0_1) +#define msa_getq_lane_s64 __builtin_msa_copy_s_d +#else +#define msa_getq_lane_u64(__a, imm0_1) ((uint64_t)(__a)[LANE_IMM0_1(imm0_1)]) +#define msa_getq_lane_s64(__a, imm0_1) ((int64_t)(__a)[LANE_IMM0_1(imm0_1)]) +#endif + +/* combine */ +#if (__mips == 64) +#define __COMBINE_64_64(__TYPE, a, b) ((__TYPE)((v2u64){((v1u64)(a))[0], ((v1u64)(b))[0]})) +#else +#define __COMBINE_64_64(__TYPE, a, b) ((__TYPE)((v4u32){((v2u32)(a))[0], ((v2u32)(a))[1], \ + ((v2u32)(b))[0], ((v2u32)(b))[1]})) +#endif + +/* v16i8 msa_combine_s8 (v8i8 __a, v8i8 __b) */ +#define msa_combine_s8(__a, __b) __COMBINE_64_64(v16i8, __a, __b) + +/* v8i16 msa_combine_s16(v4i16 __a, v4i16 __b) */ +#define msa_combine_s16(__a, __b) __COMBINE_64_64(v8i16, __a, __b) + +/* v4i32 msa_combine_s32(v2i32 __a, v2i32 __b) */ +#define msa_combine_s32(__a, __b) __COMBINE_64_64(v4i32, __a, __b) + +/* v2i64 msa_combine_s64(v1i64 __a, v1i64 __b) */ +#define msa_combine_s64(__a, __b) __COMBINE_64_64(v2i64, __a, __b) + +/* v4f32 msa_combine_f32(v2f32 __a, v2f32 __b) */ +#define msa_combine_f32(__a, __b) __COMBINE_64_64(v4f32, __a, __b) + +/* v16u8 msa_combine_u8(v8u8 __a, v8u8 __b) */ +#define msa_combine_u8(__a, __b) __COMBINE_64_64(v16u8, __a, __b) + +/* v8u16 msa_combine_u16(v4u16 __a, v4u16 __b) */ +#define msa_combine_u16(__a, __b) __COMBINE_64_64(v8u16, __a, __b) + +/* v4u32 msa_combine_u32(v2u32 __a, v2u32 __b) */ +#define msa_combine_u32(__a, __b) __COMBINE_64_64(v4u32, __a, __b) + +/* v2u64 msa_combine_u64(v1u64 __a, v1u64 __b) */ +#define msa_combine_u64(__a, __b) __COMBINE_64_64(v2u64, __a, __b) + +/* v2f64 msa_combine_f64(v1f64 __a, v1f64 __b) */ +#define msa_combine_f64(__a, __b) __COMBINE_64_64(v2f64, __a, __b) + +/* get_low, get_high */ +#if (__mips == 64) +#define __GET_LOW(__TYPE, a) ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 0)))) +#define __GET_HIGH(__TYPE, a) ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 1)))) +#else +#define __GET_LOW(__TYPE, a) ((__TYPE)(((v2u64)(a))[0])) +#define __GET_HIGH(__TYPE, a) ((__TYPE)(((v2u64)(a))[1])) +#endif + +/* v8i8 msa_get_low_s8(v16i8 __a) */ +#define msa_get_low_s8(__a) __GET_LOW(v8i8, __a) + +/* v4i16 msa_get_low_s16(v8i16 __a) */ +#define msa_get_low_s16(__a) __GET_LOW(v4i16, __a) + +/* v2i32 msa_get_low_s32(v4i32 __a) */ +#define msa_get_low_s32(__a) __GET_LOW(v2i32, __a) + +/* v1i64 msa_get_low_s64(v2i64 __a) */ +#define msa_get_low_s64(__a) __GET_LOW(v1i64, __a) + +/* v8u8 msa_get_low_u8(v16u8 __a) */ +#define msa_get_low_u8(__a) __GET_LOW(v8u8, __a) + +/* v4u16 msa_get_low_u16(v8u16 __a) */ +#define msa_get_low_u16(__a) __GET_LOW(v4u16, __a) + +/* v2u32 msa_get_low_u32(v4u32 __a) */ +#define msa_get_low_u32(__a) __GET_LOW(v2u32, __a) + +/* v1u64 msa_get_low_u64(v2u64 __a) */ +#define msa_get_low_u64(__a) __GET_LOW(v1u64, __a) + +/* v2f32 msa_get_low_f32(v4f32 __a) */ +#define msa_get_low_f32(__a) __GET_LOW(v2f32, __a) + +/* v1f64 msa_get_low_f64(v2f64 __a) */ +#define msa_get_low_f64(__a) __GET_LOW(v1f64, __a) + +/* v8i8 msa_get_high_s8(v16i8 __a) */ +#define msa_get_high_s8(__a) __GET_HIGH(v8i8, __a) + +/* v4i16 msa_get_high_s16(v8i16 __a) */ +#define msa_get_high_s16(__a) __GET_HIGH(v4i16, __a) + +/* v2i32 msa_get_high_s32(v4i32 __a) */ +#define msa_get_high_s32(__a) __GET_HIGH(v2i32, __a) + +/* v1i64 msa_get_high_s64(v2i64 __a) */ +#define msa_get_high_s64(__a) __GET_HIGH(v1i64, __a) + +/* v8u8 msa_get_high_u8(v16u8 __a) */ +#define msa_get_high_u8(__a) __GET_HIGH(v8u8, __a) + +/* v4u16 msa_get_high_u16(v8u16 __a) */ +#define msa_get_high_u16(__a) __GET_HIGH(v4u16, __a) + +/* v2u32 msa_get_high_u32(v4u32 __a) */ +#define msa_get_high_u32(__a) __GET_HIGH(v2u32, __a) + +/* v1u64 msa_get_high_u64(v2u64 __a) */ +#define msa_get_high_u64(__a) __GET_HIGH(v1u64, __a) + +/* v2f32 msa_get_high_f32(v4f32 __a) */ +#define msa_get_high_f32(__a) __GET_HIGH(v2f32, __a) + +/* v1f64 msa_get_high_f64(v2f64 __a) */ +#define msa_get_high_f64(__a) __GET_HIGH(v1f64, __a) + +/* ri = ai * b[lane] */ +/* v4f32 msa_mulq_lane_f32(v4f32 __a, v4f32 __b, const int __lane) */ +#define msa_mulq_lane_f32(__a, __b, __lane) ((__a) * msa_getq_lane_f32(__b, __lane)) + +/* ri = ai + bi * c[lane] */ +/* v4f32 msa_mlaq_lane_f32(v4f32 __a, v4f32 __b, v4f32 __c, const int __lane) */ +#define msa_mlaq_lane_f32(__a, __b, __c, __lane) ((__a) + ((__b) * msa_getq_lane_f32(__c, __lane))) + +/* uint16_t msa_sum_u16(v8u16 __a)*/ +#define msa_sum_u16(__a) \ +({ \ + v4u32 _b; \ + v2u64 _c; \ + _b = __builtin_msa_hadd_u_w(__a, __a); \ + _c = __builtin_msa_hadd_u_d(_b, _b); \ + (uint16_t)(_c[0] + _c[1]); \ +}) + +/* int16_t msa_sum_s16(v8i16 __a) */ +#define msa_sum_s16(__a) \ +({ \ + v4i32 _b; \ + v2i64 _c; \ + _b = __builtin_msa_hadd_s_w(__a, __a); \ + _c = __builtin_msa_hadd_s_d(_b, _b); \ + (int16_t)(_c[0] + _c[1]); \ +}) + + +/* uint32_t msa_sum_u32(v4u32 __a)*/ +#define msa_sum_u32(__a) \ +({ \ + v2u64 _b; \ + _b = __builtin_msa_hadd_u_d(__a, __a); \ + (uint32_t)(_b[0] + _b[1]); \ +}) + +/* int32_t msa_sum_s32(v4i32 __a)*/ +#define msa_sum_s32(__a) \ +({ \ + v2i64 _b; \ + _b = __builtin_msa_hadd_s_d(__a, __a); \ + (int32_t)(_b[0] + _b[1]); \ +}) + +/* uint8_t msa_sum_u8(v16u8 __a)*/ +#define msa_sum_u8(__a) \ +({ \ + v8u16 _b16; \ + v4u32 _c32; \ + _b16 = __builtin_msa_hadd_u_h(__a, __a); \ + _c32 = __builtin_msa_hadd_u_w(_b16, _b16); \ + (uint8_t)msa_sum_u32(_c32); \ +}) + +/* int8_t msa_sum_s8(v16s8 __a)*/ +#define msa_sum_s8(__a) \ +({ \ + v8i16 _b16; \ + v4i32 _c32; \ + _b16 = __builtin_msa_hadd_s_h(__a, __a); \ + _c32 = __builtin_msa_hadd_s_w(_b16, _b16); \ + (int8_t)msa_sum_s32(_c32); \ +}) + +/* float msa_sum_f32(v4f32 __a)*/ +#define msa_sum_f32(__a) ((__a)[0] + (__a)[1] + (__a)[2] + (__a)[3]) + +/* v8u16 msa_paddlq_u8(v16u8 __a) */ +#define msa_paddlq_u8(__a) (__builtin_msa_hadd_u_h(__a, __a)) + +/* v8i16 msa_paddlq_s8(v16i8 __a) */ +#define msa_paddlq_s8(__a) (__builtin_msa_hadd_s_h(__a, __a)) + +/* v4u32 msa_paddlq_u16 (v8u16 __a)*/ +#define msa_paddlq_u16(__a) (__builtin_msa_hadd_u_w(__a, __a)) + +/* v4i32 msa_paddlq_s16 (v8i16 __a)*/ +#define msa_paddlq_s16(__a) (__builtin_msa_hadd_s_w(__a, __a)) + +/* v2u64 msa_paddlq_u32(v4u32 __a) */ +#define msa_paddlq_u32(__a) (__builtin_msa_hadd_u_d(__a, __a)) + +/* v2i64 msa_paddlq_s32(v4i32 __a) */ +#define msa_paddlq_s32(__a) (__builtin_msa_hadd_s_d(__a, __a)) + +#define V8U8_2_V8U16(x) {(uint16_t)x[0], (uint16_t)x[1], (uint16_t)x[2], (uint16_t)x[3], \ + (uint16_t)x[4], (uint16_t)x[5], (uint16_t)x[6], (uint16_t)x[7]} +#define V8U8_2_V8I16(x) {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \ + (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]} +#define V8I8_2_V8I16(x) {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \ + (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]} +#define V4U16_2_V4U32(x) {(uint32_t)x[0], (uint32_t)x[1], (uint32_t)x[2], (uint32_t)x[3]} +#define V4U16_2_V4I32(x) {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]} +#define V4I16_2_V4I32(x) {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]} +#define V2U32_2_V2U64(x) {(uint64_t)x[0], (uint64_t)x[1]} +#define V2U32_2_V2I64(x) {(int64_t)x[0], (int64_t)x[1]} + +/* v8u16 msa_mull_u8(v8u8 __a, v8u8 __b) */ +#define msa_mull_u8(__a, __b) ((v8u16)__builtin_msa_mulv_h((v8i16)V8U8_2_V8I16(__a), (v8i16)V8U8_2_V8I16(__b))) + +/* v8i16 msa_mull_s8(v8i8 __a, v8i8 __b)*/ +#define msa_mull_s8(__a, __b) (__builtin_msa_mulv_h((v8i16)V8I8_2_V8I16(__a), (v8i16)V8I8_2_V8I16(__b))) + +/* v4u32 msa_mull_u16(v4u16 __a, v4u16 __b) */ +#define msa_mull_u16(__a, __b) ((v4u32)__builtin_msa_mulv_w((v4i32)V4U16_2_V4I32(__a), (v4i32)V4U16_2_V4I32(__b))) + +/* v4i32 msa_mull_s16(v4i16 __a, v4i16 __b) */ +#define msa_mull_s16(__a, __b) (__builtin_msa_mulv_w((v4i32)V4I16_2_V4I32(__a), (v4i32)V4I16_2_V4I32(__b))) + +/* v2u64 msa_mull_u32(v2u32 __a, v2u32 __b) */ +#define msa_mull_u32(__a, __b) ((v2u64)__builtin_msa_mulv_d((v2i64)V2U32_2_V2I64(__a), (v2i64)V2U32_2_V2I64(__b))) + +/* bitwise and: __builtin_msa_and_v */ +#define msa_andq_u8(__a, __b) ((v16u8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s8(__a, __b) ((v16i8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u16(__a, __b) ((v8u16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s16(__a, __b) ((v8i16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u32(__a, __b) ((v4u32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s32(__a, __b) ((v4i32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u64(__a, __b) ((v2u64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s64(__a, __b) ((v2i64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise or: __builtin_msa_or_v */ +#define msa_orrq_u8(__a, __b) ((v16u8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s8(__a, __b) ((v16i8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u16(__a, __b) ((v8u16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s16(__a, __b) ((v8i16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u32(__a, __b) ((v4u32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s32(__a, __b) ((v4i32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u64(__a, __b) ((v2u64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s64(__a, __b) ((v2i64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise xor: __builtin_msa_xor_v */ +#define msa_eorq_u8(__a, __b) ((v16u8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s8(__a, __b) ((v16i8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u16(__a, __b) ((v8u16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s16(__a, __b) ((v8i16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u32(__a, __b) ((v4u32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s32(__a, __b) ((v4i32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u64(__a, __b) ((v2u64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s64(__a, __b) ((v2i64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise not: v16u8 __builtin_msa_xori_b (v16u8, 0xff) */ +#define msa_mvnq_u8(__a) ((v16u8)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s8(__a) ((v16i8)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u16(__a) ((v8u16)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s16(__a) ((v8i16)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u32(__a) ((v4u32)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s32(__a) ((v4i32)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u64(__a) ((v2u64)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s64(__a) ((v2i64)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) + +/* compare equal: ceq -> ri = ai == bi ? 1...1:0...0 */ +#define msa_ceqq_u8(__a, __b) ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b))) +#define msa_ceqq_s8(__a, __b) ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b))) +#define msa_ceqq_u16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b))) +#define msa_ceqq_s16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b))) +#define msa_ceqq_u32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b))) +#define msa_ceqq_s32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b))) +#define msa_ceqq_f32(__a, __b) ((v4u32)__builtin_msa_fceq_w((v4f32)(__a), (v4f32)(__b))) +#define msa_ceqq_u64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b))) +#define msa_ceqq_s64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b))) +#define msa_ceqq_f64(__a, __b) ((v2u64)__builtin_msa_fceq_d((v2f64)(__a), (v2f64)(__b))) + +/* Compare less-than: clt -> ri = ai < bi ? 1...1:0...0 */ +#define msa_cltq_u8(__a, __b) ((v16u8)__builtin_msa_clt_u_b((v16u8)(__a), (v16u8)(__b))) +#define msa_cltq_s8(__a, __b) ((v16u8)__builtin_msa_clt_s_b((v16i8)(__a), (v16i8)(__b))) +#define msa_cltq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__a), (v8u16)(__b))) +#define msa_cltq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__a), (v8i16)(__b))) +#define msa_cltq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__a), (v4u32)(__b))) +#define msa_cltq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__a), (v4i32)(__b))) +#define msa_cltq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__a), (v4f32)(__b))) +#define msa_cltq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__a), (v2u64)(__b))) +#define msa_cltq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__a), (v2i64)(__b))) +#define msa_cltq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__a), (v2f64)(__b))) + +/* compare greater-than: cgt -> ri = ai > bi ? 1...1:0...0 */ +#define msa_cgtq_u8(__a, __b) ((v16u8)__builtin_msa_clt_u_b((v16u8)(__b), (v16u8)(__a))) +#define msa_cgtq_s8(__a, __b) ((v16u8)__builtin_msa_clt_s_b((v16i8)(__b), (v16i8)(__a))) +#define msa_cgtq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__b), (v8u16)(__a))) +#define msa_cgtq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__b), (v8i16)(__a))) +#define msa_cgtq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__b), (v4u32)(__a))) +#define msa_cgtq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__b), (v4i32)(__a))) +#define msa_cgtq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__b), (v4f32)(__a))) +#define msa_cgtq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__b), (v2u64)(__a))) +#define msa_cgtq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__b), (v2i64)(__a))) +#define msa_cgtq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__b), (v2f64)(__a))) + +/* compare less-equal: cle -> ri = ai <= bi ? 1...1:0...0 */ +#define msa_cleq_u8(__a, __b) ((v16u8)__builtin_msa_cle_u_b((v16u8)(__a), (v16u8)(__b))) +#define msa_cleq_s8(__a, __b) ((v16u8)__builtin_msa_cle_s_b((v16i8)(__a), (v16i8)(__b))) +#define msa_cleq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__a), (v8u16)(__b))) +#define msa_cleq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__a), (v8i16)(__b))) +#define msa_cleq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__a), (v4u32)(__b))) +#define msa_cleq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__a), (v4i32)(__b))) +#define msa_cleq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__a), (v4f32)(__b))) +#define msa_cleq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__a), (v2u64)(__b))) +#define msa_cleq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__a), (v2i64)(__b))) +#define msa_cleq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__a), (v2f64)(__b))) + +/* compare greater-equal: cge -> ri = ai >= bi ? 1...1:0...0 */ +#define msa_cgeq_u8(__a, __b) ((v16u8)__builtin_msa_cle_u_b((v16u8)(__b), (v16u8)(__a))) +#define msa_cgeq_s8(__a, __b) ((v16u8)__builtin_msa_cle_s_b((v16i8)(__b), (v16i8)(__a))) +#define msa_cgeq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__b), (v8u16)(__a))) +#define msa_cgeq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__b), (v8i16)(__a))) +#define msa_cgeq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__b), (v4u32)(__a))) +#define msa_cgeq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__b), (v4i32)(__a))) +#define msa_cgeq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__b), (v4f32)(__a))) +#define msa_cgeq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__b), (v2u64)(__a))) +#define msa_cgeq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__b), (v2i64)(__a))) +#define msa_cgeq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__b), (v2f64)(__a))) + +/* Shift Left Logical: shl -> ri = ai << bi; */ +#define msa_shlq_u8(__a, __b) ((v16u8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shlq_s8(__a, __b) ((v16i8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shlq_u16(__a, __b) ((v8u16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shlq_s16(__a, __b) ((v8i16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shlq_u32(__a, __b) ((v4u32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shlq_s32(__a, __b) ((v4i32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shlq_u64(__a, __b) ((v2u64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b))) +#define msa_shlq_s64(__a, __b) ((v2i64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b))) + +/* Immediate Shift Left Logical: shl -> ri = ai << imm; */ +#define msa_shlq_n_u8(__a, __imm) ((v16u8)__builtin_msa_slli_b((v16i8)(__a), __imm)) +#define msa_shlq_n_s8(__a, __imm) ((v16i8)__builtin_msa_slli_b((v16i8)(__a), __imm)) +#define msa_shlq_n_u16(__a, __imm) ((v8u16)__builtin_msa_slli_h((v8i16)(__a), __imm)) +#define msa_shlq_n_s16(__a, __imm) ((v8i16)__builtin_msa_slli_h((v8i16)(__a), __imm)) +#define msa_shlq_n_u32(__a, __imm) ((v4u32)__builtin_msa_slli_w((v4i32)(__a), __imm)) +#define msa_shlq_n_s32(__a, __imm) ((v4i32)__builtin_msa_slli_w((v4i32)(__a), __imm)) +#define msa_shlq_n_u64(__a, __imm) ((v2u64)__builtin_msa_slli_d((v2i64)(__a), __imm)) +#define msa_shlq_n_s64(__a, __imm) ((v2i64)__builtin_msa_slli_d((v2i64)(__a), __imm)) + +/* shift right: shrq -> ri = ai >> bi; */ +#define msa_shrq_u8(__a, __b) ((v16u8)__builtin_msa_srl_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shrq_s8(__a, __b) ((v16i8)__builtin_msa_sra_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shrq_u16(__a, __b) ((v8u16)__builtin_msa_srl_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shrq_s16(__a, __b) ((v8i16)__builtin_msa_sra_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shrq_u32(__a, __b) ((v4u32)__builtin_msa_srl_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shrq_s32(__a, __b) ((v4i32)__builtin_msa_sra_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shrq_u64(__a, __b) ((v2u64)__builtin_msa_srl_d((v2i64)(__a), (v2i64)(__b))) +#define msa_shrq_s64(__a, __b) ((v2i64)__builtin_msa_sra_d((v2i64)(__a), (v2i64)(__b))) + +/* Immediate Shift Right: shr -> ri = ai >> imm; */ +#define msa_shrq_n_u8(__a, __imm) ((v16u8)__builtin_msa_srli_b((v16i8)(__a), __imm)) +#define msa_shrq_n_s8(__a, __imm) ((v16i8)__builtin_msa_srai_b((v16i8)(__a), __imm)) +#define msa_shrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srli_h((v8i16)(__a), __imm)) +#define msa_shrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srai_h((v8i16)(__a), __imm)) +#define msa_shrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srli_w((v4i32)(__a), __imm)) +#define msa_shrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srai_w((v4i32)(__a), __imm)) +#define msa_shrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srli_d((v2i64)(__a), __imm)) +#define msa_shrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srai_d((v2i64)(__a), __imm)) + +/* Immediate Shift Right Rounded: shr -> ri = ai >> (rounded)imm; */ +#define msa_rshrq_n_u8(__a, __imm) ((v16u8)__builtin_msa_srlri_b((v16i8)(__a), __imm)) +#define msa_rshrq_n_s8(__a, __imm) ((v16i8)__builtin_msa_srari_b((v16i8)(__a), __imm)) +#define msa_rshrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srlri_h((v8i16)(__a), __imm)) +#define msa_rshrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srari_h((v8i16)(__a), __imm)) +#define msa_rshrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srlri_w((v4i32)(__a), __imm)) +#define msa_rshrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srari_w((v4i32)(__a), __imm)) +#define msa_rshrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srlri_d((v2i64)(__a), __imm)) +#define msa_rshrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srari_d((v2i64)(__a), __imm)) + +/* Vector saturating rounding shift left, qrshl -> ri = ai << bi; */ +#define msa_qrshrq_s32(a, b) ((v4i32)__msa_srar_w((v4i32)(a), (v4i32)(b))) + +/* Rename the msa builtin func to unify the name style for intrin_msa.hpp */ +#define msa_qaddq_u8 __builtin_msa_adds_u_b +#define msa_qaddq_s8 __builtin_msa_adds_s_b +#define msa_qaddq_u16 __builtin_msa_adds_u_h +#define msa_qaddq_s16 __builtin_msa_adds_s_h +#define msa_qaddq_u32 __builtin_msa_adds_u_w +#define msa_qaddq_s32 __builtin_msa_adds_s_w +#define msa_qaddq_u64 __builtin_msa_adds_u_d +#define msa_qaddq_s64 __builtin_msa_adds_s_d +#define msa_addq_u8(a, b) ((v16u8)__builtin_msa_addv_b((v16i8)(a), (v16i8)(b))) +#define msa_addq_s8 __builtin_msa_addv_b +#define msa_addq_u16(a, b) ((v8u16)__builtin_msa_addv_h((v8i16)(a), (v8i16)(b))) +#define msa_addq_s16 __builtin_msa_addv_h +#define msa_addq_u32(a, b) ((v4u32)__builtin_msa_addv_w((v4i32)(a), (v4i32)(b))) +#define msa_addq_s32 __builtin_msa_addv_w +#define msa_addq_f32 __builtin_msa_fadd_w +#define msa_addq_u64(a, b) ((v2u64)__builtin_msa_addv_d((v2i64)(a), (v2i64)(b))) +#define msa_addq_s64 __builtin_msa_addv_d +#define msa_addq_f64 __builtin_msa_fadd_d +#define msa_qsubq_u8 __builtin_msa_subs_u_b +#define msa_qsubq_s8 __builtin_msa_subs_s_b +#define msa_qsubq_u16 __builtin_msa_subs_u_h +#define msa_qsubq_s16 __builtin_msa_subs_s_h +#define msa_subq_u8(a, b) ((v16u8)__builtin_msa_subv_b((v16i8)(a), (v16i8)(b))) +#define msa_subq_s8 __builtin_msa_subv_b +#define msa_subq_u16(a, b) ((v8u16)__builtin_msa_subv_h((v8i16)(a), (v8i16)(b))) +#define msa_subq_s16 __builtin_msa_subv_h +#define msa_subq_u32(a, b) ((v4u32)__builtin_msa_subv_w((v4i32)(a), (v4i32)(b))) +#define msa_subq_s32 __builtin_msa_subv_w +#define msa_subq_f32 __builtin_msa_fsub_w +#define msa_subq_u64(a, b) ((v2u64)__builtin_msa_subv_d((v2i64)(a), (v2i64)(b))) +#define msa_subq_s64 __builtin_msa_subv_d +#define msa_subq_f64 __builtin_msa_fsub_d +#define msa_mulq_u8(a, b) ((v16u8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b))) +#define msa_mulq_s8(a, b) ((v16i8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b))) +#define msa_mulq_u16(a, b) ((v8u16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b))) +#define msa_mulq_s16(a, b) ((v8i16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b))) +#define msa_mulq_u32(a, b) ((v4u32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b))) +#define msa_mulq_s32(a, b) ((v4i32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b))) +#define msa_mulq_u64(a, b) ((v2u64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b))) +#define msa_mulq_s64(a, b) ((v2i64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b))) +#define msa_mulq_f32 __builtin_msa_fmul_w +#define msa_mulq_f64 __builtin_msa_fmul_d +#define msa_divq_f32 __builtin_msa_fdiv_w +#define msa_divq_f64 __builtin_msa_fdiv_d +#define msa_dotp_s_h __builtin_msa_dotp_s_h +#define msa_dotp_s_w __builtin_msa_dotp_s_w +#define msa_dotp_s_d __builtin_msa_dotp_s_d +#define msa_dotp_u_h __builtin_msa_dotp_u_h +#define msa_dotp_u_w __builtin_msa_dotp_u_w +#define msa_dotp_u_d __builtin_msa_dotp_u_d +#define msa_dpadd_s_h __builtin_msa_dpadd_s_h +#define msa_dpadd_s_w __builtin_msa_dpadd_s_w +#define msa_dpadd_s_d __builtin_msa_dpadd_s_d +#define msa_dpadd_u_h __builtin_msa_dpadd_u_h +#define msa_dpadd_u_w __builtin_msa_dpadd_u_w +#define msa_dpadd_u_d __builtin_msa_dpadd_u_d + +#define ILVRL_B2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_b((v16i8)(in0), (v16i8)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_b((v16i8)(in0), (v16i8)(in1)); \ + } while (0) +#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) +#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) +#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) +#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__) +#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__) + +#define ILVRL_H2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_h((v8i16)(in0), (v8i16)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_h((v8i16)(in0), (v8i16)(in1)); \ + } while (0) +#define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__) +#define ILVRL_H2_SB(...) ILVRL_H2(v16i8, __VA_ARGS__) +#define ILVRL_H2_UH(...) ILVRL_H2(v8u16, __VA_ARGS__) +#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__) +#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__) +#define ILVRL_H2_UW(...) ILVRL_H2(v4u32, __VA_ARGS__) + +#define ILVRL_W2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_w((v4i32)(in0), (v4i32)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_w((v4i32)(in0), (v4i32)(in1)); \ + } while (0) +#define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__) +#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__) +#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__) +#define ILVRL_W2_UW(...) ILVRL_W2(v4u32, __VA_ARGS__) + +/* absq, qabsq (r = |a|;) */ +#define msa_absq_s8(a) __builtin_msa_add_a_b(a, __builtin_msa_fill_b(0)) +#define msa_absq_s16(a) __builtin_msa_add_a_h(a, __builtin_msa_fill_h(0)) +#define msa_absq_s32(a) __builtin_msa_add_a_w(a, __builtin_msa_fill_w(0)) +#define msa_absq_s64(a) __builtin_msa_add_a_d(a, __builtin_msa_fill_d(0)) +#define msa_absq_f32(a) ((v4f32)__builtin_msa_bclri_w((v4u32)(a), 31)) +#define msa_absq_f64(a) ((v2f64)__builtin_msa_bclri_d((v2u64)(a), 63)) +#define msa_qabsq_s8(a) __builtin_msa_adds_a_b(a, __builtin_msa_fill_b(0)) +#define msa_qabsq_s16(a) __builtin_msa_adds_a_h(a, __builtin_msa_fill_h(0)) +#define msa_qabsq_s32(a) __builtin_msa_adds_a_w(a, __builtin_msa_fill_w(0)) +#define msa_qabsq_s64(a) __builtin_msa_adds_a_d(a, __builtin_msa_fill_d(0)) + +/* abdq, qabdq (r = |a - b|;) */ +#define msa_abdq_u8 __builtin_msa_asub_u_b +#define msa_abdq_s8 __builtin_msa_asub_s_b +#define msa_abdq_u16 __builtin_msa_asub_u_h +#define msa_abdq_s16 __builtin_msa_asub_s_h +#define msa_abdq_u32 __builtin_msa_asub_u_w +#define msa_abdq_s32 __builtin_msa_asub_s_w +#define msa_abdq_u64 __builtin_msa_asub_u_d +#define msa_abdq_s64 __builtin_msa_asub_s_d +#define msa_abdq_f32(a, b) msa_absq_f32(__builtin_msa_fsub_w(a, b)) +#define msa_abdq_f64(a, b) msa_absq_f64(__builtin_msa_fsub_d(a, b)) +#define msa_qabdq_s8(a, b) msa_qabsq_s8(__builtin_msa_subs_s_b(a, b)) +#define msa_qabdq_s16(a, b) msa_qabsq_s16(__builtin_msa_subs_s_h(a, b)) +#define msa_qabdq_s32(a, b) msa_qabsq_s32(__builtin_msa_subs_s_w(a, b)) +#define msa_qabdq_s64(a, b) msa_qabsq_s64(__builtin_msa_subs_s_d(a, b)) + +/* sqrtq, rsqrtq */ +#define msa_sqrtq_f32 __builtin_msa_fsqrt_w +#define msa_sqrtq_f64 __builtin_msa_fsqrt_d +#define msa_rsqrtq_f32 __builtin_msa_frsqrt_w +#define msa_rsqrtq_f64 __builtin_msa_frsqrt_d + + +/* mlaq: r = a + b * c; */ +__extension__ extern __inline v4i32 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_s32(v4i32 __a, v4i32 __b, v4i32 __c) +{ + __asm__ volatile("maddv.w %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v2i64 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_s64(v2i64 __a, v2i64 __b, v2i64 __c) +{ + __asm__ volatile("maddv.d %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v4f32 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_f32(v4f32 __a, v4f32 __b, v4f32 __c) +{ + __asm__ volatile("fmadd.w %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v2f64 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_f64(v2f64 __a, v2f64 __b, v2f64 __c) +{ + __asm__ volatile("fmadd.d %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +/* cntq */ +#define msa_cntq_s8 __builtin_msa_pcnt_b +#define msa_cntq_s16 __builtin_msa_pcnt_h +#define msa_cntq_s32 __builtin_msa_pcnt_w +#define msa_cntq_s64 __builtin_msa_pcnt_d + +/* bslq (a: mask; r = b(if a == 0); r = c(if a == 1);) */ +#define msa_bslq_u8 __builtin_msa_bsel_v + +/* ilvrq, ilvlq (For EL only, ilvrq: b0, a0, b1, a1; ilvlq: b2, a2, b3, a3;) */ +#define msa_ilvrq_s8 __builtin_msa_ilvr_b +#define msa_ilvrq_s16 __builtin_msa_ilvr_h +#define msa_ilvrq_s32 __builtin_msa_ilvr_w +#define msa_ilvrq_s64 __builtin_msa_ilvr_d +#define msa_ilvlq_s8 __builtin_msa_ilvl_b +#define msa_ilvlq_s16 __builtin_msa_ilvl_h +#define msa_ilvlq_s32 __builtin_msa_ilvl_w +#define msa_ilvlq_s64 __builtin_msa_ilvl_d + +/* ilvevq, ilvodq (ilvevq: b0, a0, b2, a2; ilvodq: b1, a1, b3, a3; ) */ +#define msa_ilvevq_s8 __builtin_msa_ilvev_b +#define msa_ilvevq_s16 __builtin_msa_ilvev_h +#define msa_ilvevq_s32 __builtin_msa_ilvev_w +#define msa_ilvevq_s64 __builtin_msa_ilvev_d +#define msa_ilvodq_s8 __builtin_msa_ilvod_b +#define msa_ilvodq_s16 __builtin_msa_ilvod_h +#define msa_ilvodq_s32 __builtin_msa_ilvod_w +#define msa_ilvodq_s64 __builtin_msa_ilvod_d + +/* extq (r = (a || b); a concatenation b and get elements from index c) */ +#ifdef _MIPSEB +#define msa_extq_s8(a, b, c) \ +(__builtin_msa_vshf_b(__builtin_msa_subv_b((v16i8)((v2i64){0x1716151413121110, 0x1F1E1D1C1B1A1918}), __builtin_msa_fill_b(c)), a, b)) +#define msa_extq_s16(a, b, c) \ +(__builtin_msa_vshf_h(__builtin_msa_subv_h((v8i16)((v2i64){0x000B000A00090008, 0x000F000E000D000C}), __builtin_msa_fill_h(c)), a, b)) +#define msa_extq_s32(a, b, c) \ +(__builtin_msa_vshf_w(__builtin_msa_subv_w((v4i32)((v2i64){0x0000000500000004, 0x0000000700000006}), __builtin_msa_fill_w(c)), a, b)) +#define msa_extq_s64(a, b, c) \ +(__builtin_msa_vshf_d(__builtin_msa_subv_d((v2i64){0x0000000000000002, 0x0000000000000003}, __builtin_msa_fill_d(c)), a, b)) +#else +#define msa_extq_s8(a, b, c) \ +(__builtin_msa_vshf_b(__builtin_msa_addv_b((v16i8)((v2i64){0x0706050403020100, 0x0F0E0D0C0B0A0908}), __builtin_msa_fill_b(c)), b, a)) +#define msa_extq_s16(a, b, c) \ +(__builtin_msa_vshf_h(__builtin_msa_addv_h((v8i16)((v2i64){0x0003000200010000, 0x0007000600050004}), __builtin_msa_fill_h(c)), b, a)) +#define msa_extq_s32(a, b, c) \ +(__builtin_msa_vshf_w(__builtin_msa_addv_w((v4i32)((v2i64){0x0000000100000000, 0x0000000300000002}), __builtin_msa_fill_w(c)), b, a)) +#define msa_extq_s64(a, b, c) \ +(__builtin_msa_vshf_d(__builtin_msa_addv_d((v2i64){0x0000000000000000, 0x0000000000000001}, __builtin_msa_fill_d(c)), b, a)) +#endif /* _MIPSEB */ + +/* cvttruncq, cvttintq, cvtrintq */ +#define msa_cvttruncq_u32_f32 __builtin_msa_ftrunc_u_w +#define msa_cvttruncq_s32_f32 __builtin_msa_ftrunc_s_w +#define msa_cvttruncq_u64_f64 __builtin_msa_ftrunc_u_d +#define msa_cvttruncq_s64_f64 __builtin_msa_ftrunc_s_d +#define msa_cvttintq_u32_f32 __builtin_msa_ftint_u_w +#define msa_cvttintq_s32_f32 __builtin_msa_ftint_s_w +#define msa_cvttintq_u64_f64 __builtin_msa_ftint_u_d +#define msa_cvttintq_s64_f64 __builtin_msa_ftint_s_d +#define msa_cvtrintq_f32 __builtin_msa_frint_w +#define msa_cvtrintq_f64 __builtin_msa_frint_d + +/* cvtfintq, cvtfq */ +#define msa_cvtfintq_f32_u32 __builtin_msa_ffint_u_w +#define msa_cvtfintq_f32_s32 __builtin_msa_ffint_s_w +#define msa_cvtfintq_f64_u64 __builtin_msa_ffint_u_d +#define msa_cvtfintq_f64_s64 __builtin_msa_ffint_s_d +#define msa_cvtfq_f32_f64 __builtin_msa_fexdo_w +#define msa_cvtflq_f64_f32 __builtin_msa_fexupr_d +#define msa_cvtfhq_f64_f32 __builtin_msa_fexupl_d + +#define msa_addl_u8(a, b) ((v8u16)__builtin_msa_addv_h((v8i16)V8U8_2_V8I16(a), (v8i16)V8U8_2_V8I16(b))) +#define msa_addl_s8(a, b) (__builtin_msa_addv_h((v8i16)V8I8_2_V8I16(a), (v8i16)V8I8_2_V8I16(b))) +#define msa_addl_u16(a, b) ((v4u32)__builtin_msa_addv_w((v4i32)V4U16_2_V4I32(a), (v4i32)V4U16_2_V4I32(b))) +#define msa_addl_s16(a, b) (__builtin_msa_addv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b))) +#define msa_subl_s16(a, b) (__builtin_msa_subv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b))) +#define msa_recpeq_f32 __builtin_msa_frcp_w +#define msa_recpsq_f32(a, b) (__builtin_msa_fsub_w(msa_dupq_n_f32(2.0f), __builtin_msa_fmul_w(a, b))) + +#define MSA_INTERLEAVED_IMPL_LOAD2_STORE2(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld2q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \ + *a = (_Tpv)__builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st2q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b) \ +{ \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df((_Tpvs)b, (_Tpvs)a)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint8_t, v16u8, v16i8, u8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int8_t, v16i8, v16i8, s8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint16_t, v8u16, v8i16, u16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int16_t, v8i16, v8i16, s16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint32_t, v4u32, v4i32, u32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int32_t, v4i32, v4i32, s32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(float, v4f32, v4i32, f32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint64_t, v2u64, v2i64, u64, d, 2) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int64_t, v2i64, v2i64, s64, d, 2) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(double, v2f64, v2i64, f64, d, 2) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \ + _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0704011F1F1F1F1F, 0x1F1C191613100D0A}), (_Tpvs)v0, (_Tpvs)v1); \ + *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150E0B080502, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0603001F1F1F1F1F, 0x1E1B1815120F0C09}), (_Tpvs)v0, (_Tpvs)v1); \ + *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150D0A070401, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x05021F1F1F1F1F1F, 0x1D1A1714110E0B08}), (_Tpvs)v0, (_Tpvs)v1); \ + *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x17160F0C09060300, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ +} +#else +#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \ + _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15120F0C09060300, 0x00000000001E1B18}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1D1A1714110A0908}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1613100D0A070401, 0x00000000001F1C19}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1E1B1815120A0908}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1714110E0B080502, 0x0000000000001D1A}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1F1C191613100908}), (_Tpvs)v2, v3); \ +} +#endif + +MSA_INTERLEAVED_IMPL_LOAD3_8(uint8_t, v16u8, v16i8, u8) +MSA_INTERLEAVED_IMPL_LOAD3_8(int8_t, v16i8, v16i8, s8) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \ + _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00030000000F000F, 0x000F000C00090006}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000A00050002, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0002000F000F000F, 0x000E000B00080005}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000700040001, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000F000F000F, 0x000D000A00070004}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000600030000, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ +} +#else +#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \ + _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0009000600030000, 0x00000000000F000C}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000D000A00050004}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A000700040001, 0x000000000000000D}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000E000B00080004}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000800050002, 0x000000000000000E}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000F000C00090004}), (_Tpvs)v2, v3); \ +} +#endif + +MSA_INTERLEAVED_IMPL_LOAD3_16(uint16_t, v8u16, v8i16, u16) +MSA_INTERLEAVED_IMPL_LOAD3_16(int16_t, v8i16, v8i16, s16) + +#define MSA_INTERLEAVED_IMPL_LOAD3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v00 = msa_ld1q_##suffix(ptr); \ + _Tpv v01 = msa_ld1q_##suffix(ptr + 4); \ + _Tpv v02 = msa_ld1q_##suffix(ptr + 8); \ + _Tpvs v10 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v01, (v2i64)v01), (_Tpvs)v00); \ + _Tpvs v11 = __builtin_msa_ilvr_w((_Tpvs)v02, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v00, (v2i64)v00)); \ + _Tpvs v12 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v02, (v2i64)v02), (_Tpvs)v01); \ + *a = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v11, (v2i64)v11), v10); \ + *b = (_Tpv)__builtin_msa_ilvr_w(v12, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v10, (v2i64)v10)); \ + *c = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v12, (v2i64)v12), v11); \ +} + +MSA_INTERLEAVED_IMPL_LOAD3_32(uint32_t, v4u32, v4i32, u32) +MSA_INTERLEAVED_IMPL_LOAD3_32(int32_t, v4i32, v4i32, s32) +MSA_INTERLEAVED_IMPL_LOAD3_32(float, v4f32, v4i32, f32) + +#define MSA_INTERLEAVED_IMPL_LOAD3_64(_Tp, _Tpv, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + *((_Tp*)a) = *ptr; *((_Tp*)b) = *(ptr + 1); *((_Tp*)c) = *(ptr + 2); \ + *((_Tp*)a + 1) = *(ptr + 3); *((_Tp*)b + 1) = *(ptr + 4); *((_Tp*)c + 1) = *(ptr + 5); \ +} + +MSA_INTERLEAVED_IMPL_LOAD3_64(uint64_t, v2u64, u64) +MSA_INTERLEAVED_IMPL_LOAD3_64(int64_t, v2i64, s64) +MSA_INTERLEAVED_IMPL_LOAD3_64(double, v2f64, f64) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0F0E0D0C0B1F1F1F, 0x1F1E1D1C1B1A1F1F}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0D1C140C1B130B1A, 0x1F170F1E160E1D15}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A09080706051F1F, 0x19181716151F1F1F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1D14071C13061B12, 0x170A1F16091E1508}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x04030201001F1F1F, 0x14131211101F1F1F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15021C14011B1300, 0x051F17041E16031D}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000050403020100, 0x0000001413121110}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A02110901100800, 0x05140C04130B0312}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000A09080706, 0x00001A1918171615}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x170A011609001508, 0x0D04190C03180B02}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000F0E0D0C0B, 0x0000001F1E1D1C1B}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x021C09011B08001A, 0x1F0C041E0B031D0A}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_8(uint8_t, v16u8, v16i8, u8) +MSA_INTERLEAVED_IMPL_STORE3_8(int8_t, v16i8, v16i8, s8) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000700060005000F, 0x000F000E000D000F}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A0006000D0009, 0x000F000B0007000E}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00040003000F000F, 0x000C000B000A000F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000E000A0003000D, 0x0005000F000B0004}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000200010000000F, 0x00090008000F000F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000E00090000, 0x000B0002000F000A}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000200010000, 0x0000000A00090008}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000800040000, 0x0006000200090005}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000500040003, 0x00000000000C000B}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B00040000000A, 0x0002000C00050001}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000000070006, 0x0000000F000E000D}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00050000000D0004, 0x000F00060001000E}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_16(uint16_t, v8u16, v8i16, u16) +MSA_INTERLEAVED_IMPL_STORE3_16(int16_t, v8i16, v8i16, s16) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000007, 0x0000000700000006}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000006, 0x0000000700000005}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000001, 0x0000000500000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000700000004, 0x0000000500000002}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000007, 0x0000000400000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000000, 0x0000000100000007}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000100000000, 0x0000000000000004}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000000, 0x0000000100000004}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000002, 0x0000000600000005}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000002, 0x0000000300000000}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000003, 0x0000000000000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000006, 0x0000000700000002}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_32(uint32_t, v4u32, v4i32, u32) +MSA_INTERLEAVED_IMPL_STORE3_32(int32_t, v4i32, v4i32, s32) +MSA_INTERLEAVED_IMPL_STORE3_32(float, v4f32, v4i32, f32) + +#define MSA_INTERLEAVED_IMPL_STORE3_64(_Tp, _Tpv, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + *ptr = a[0]; *(ptr + 1) = b[0]; *(ptr + 2) = c[0]; \ + *(ptr + 3) = a[1]; *(ptr + 4) = b[1]; *(ptr + 5) = c[1]; \ +} + +MSA_INTERLEAVED_IMPL_STORE3_64(uint64_t, v2u64, u64) +MSA_INTERLEAVED_IMPL_STORE3_64(int64_t, v2i64, s64) +MSA_INTERLEAVED_IMPL_STORE3_64(double, v2f64, f64) + +#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + nlanes * 2); \ + _Tpv v3 = msa_ld1q_##suffix(ptr + nlanes * 3); \ + _Tpvs t0 = __builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \ + _Tpvs t1 = __builtin_msa_pckev_##df((_Tpvs)v3, (_Tpvs)v2); \ + _Tpvs t2 = __builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \ + _Tpvs t3 = __builtin_msa_pckod_##df((_Tpvs)v3, (_Tpvs)v2); \ + *a = (_Tpv)__builtin_msa_pckev_##df(t1, t0); \ + *b = (_Tpv)__builtin_msa_pckev_##df(t3, t2); \ + *c = (_Tpv)__builtin_msa_pckod_##df(t1, t0); \ + *d = (_Tpv)__builtin_msa_pckod_##df(t3, t2); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \ +{ \ + _Tpvs v0 = __builtin_msa_ilvr_##df((_Tpvs)c, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_ilvr_##df((_Tpvs)d, (_Tpvs)b); \ + _Tpvs v2 = __builtin_msa_ilvl_##df((_Tpvs)c, (_Tpvs)a); \ + _Tpvs v3 = __builtin_msa_ilvl_##df((_Tpvs)d, (_Tpvs)b); \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df(v1, v0)); \ + msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df(v1, v0)); \ + msa_st1q_##suffix(ptr + 2 * nlanes, (_Tpv)__builtin_msa_ilvr_##df(v3, v2)); \ + msa_st1q_##suffix(ptr + 3 * nlanes, (_Tpv)__builtin_msa_ilvl_##df(v3, v2)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint8_t, v16u8, v16i8, u8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int8_t, v16i8, v16i8, s8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint16_t, v8u16, v8i16, u16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int16_t, v8i16, v8i16, s16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint32_t, v4u32, v4i32, u32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int32_t, v4i32, v4i32, s32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(float, v4f32, v4i32, f32, w, 4) + +#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 2); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 4); \ + _Tpv v3 = msa_ld1q_##suffix(ptr + 6); \ + *a = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v2, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v2, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v3, (_Tpvs)v1); \ + *d = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v3, (_Tpvs)v1); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \ +{ \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + 2, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)d, (_Tpvs)c)); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + 6, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)d, (_Tpvs)c)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(uint64_t, v2u64, v2i64, u64) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(int64_t, v2i64, v2i64, s64) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(double, v2f64, v2i64, f64) + +__extension__ extern __inline v8i16 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_qdmulhq_n_s16(v8i16 a, int16_t b) +{ + v8i16 a_lo, a_hi; + ILVRL_H2_SH(a, msa_dupq_n_s16(0), a_lo, a_hi); + return msa_packr_s32(msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_dupq_n_s32(b)), 1), + msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_hi), msa_dupq_n_s32(b)), 1), 16); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /*__mips_msa*/ +#endif /* OPENCV_CORE_MSA_MACROS_H */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/simd_utils.impl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/simd_utils.impl.hpp new file mode 100644 index 0000000..fff8f94 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/hal/simd_utils.impl.hpp @@ -0,0 +1,146 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This header is not standalone. Don't include directly, use "intrin.hpp" instead. +#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp + + +#if CV_SIMD128 || CV_SIMD128_CPP + +template struct Type2Vec128_Traits; +#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec128_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2); +#if CV_SIMD128_64F +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2); +#endif + +template static inline +typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a); + +template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); } +template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const ushort& a) { return v_setall_u16(a); } +template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); } +template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); } +template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const uint64& a) { return v_setall_u64(a); } +template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); } +template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); } +#if CV_SIMD128_64F +template<> inline Type2Vec128_Traits::vec_type v_setall(const double& a) { return v_setall_f64(a); } +#endif + +#endif // SIMD128 + + +#if CV_SIMD256 + +template struct Type2Vec256_Traits; +#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec256_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4); +#if CV_SIMD256_64F +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4); +#endif + +template static inline +typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a); + +template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); } +template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const ushort& a) { return v256_setall_u16(a); } +template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); } +template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); } +template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const uint64& a) { return v256_setall_u64(a); } +template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); } +template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); } +#if CV_SIMD256_64F +template<> inline Type2Vec256_Traits::vec_type v256_setall(const double& a) { return v256_setall_f64(a); } +#endif + +#endif // SIMD256 + + +#if CV_SIMD512 + +template struct Type2Vec512_Traits; +#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec512_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8); +#if CV_SIMD512_64F +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8); +#endif + +template static inline +typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a); + +template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); } +template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const ushort& a) { return v512_setall_u16(a); } +template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); } +template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); } +template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const uint64& a) { return v512_setall_u64(a); } +template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); } +template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); } +#if CV_SIMD512_64F +template<> inline Type2Vec512_Traits::vec_type v512_setall(const double& a) { return v512_setall_f64(a); } +#endif + +#endif // SIMD512 + + +#if CV_SIMD_WIDTH == 16 +template static inline +typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); } +#elif CV_SIMD_WIDTH == 32 +template static inline +typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); } +#elif CV_SIMD_WIDTH == 64 +template static inline +typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); } +#else +#error "Build configuration error, unsupported CV_SIMD_WIDTH" +#endif + + +#endif // OPENCV_HAL_INTRIN_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/mat.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/mat.hpp new file mode 100644 index 0000000..c6375f6 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/mat.hpp @@ -0,0 +1,3725 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_MAT_HPP +#define OPENCV_CORE_MAT_HPP + +#ifndef __cplusplus +# error mat.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/matx.hpp" +#include "opencv2/core/types.hpp" + +#include "opencv2/core/bufferpool.hpp" + +#include + +namespace cv +{ + +//! @addtogroup core_basic +//! @{ + +enum AccessFlag { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25, + ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 }; +CV_ENUM_FLAGS(AccessFlag) +__CV_ENUM_FLAGS_BITWISE_AND(AccessFlag, int, AccessFlag) + +CV__DEBUG_NS_BEGIN + +class CV_EXPORTS _OutputArray; + +//////////////////////// Input/Output Array Arguments ///////////////////////////////// + +/** @brief This is the proxy class for passing read-only input arrays into OpenCV functions. + +It is defined as: +@code + typedef const _InputArray& InputArray; +@endcode +where _InputArray is a class that can be constructed from `Mat`, `Mat_`, `Matx`, +`std::vector`, `std::vector >`, `std::vector`, `std::vector >`, +`UMat`, `std::vector` or `double`. It can also be constructed from a matrix expression. + +Since this is mostly implementation-level class, and its interface may change in future versions, we +do not describe it in details. There are a few key things, though, that should be kept in mind: + +- When you see in the reference manual or in OpenCV source code a function that takes + InputArray, it means that you can actually pass `Mat`, `Matx`, `vector` etc. (see above the + complete list). +- Optional input arguments: If some of the input arrays may be empty, pass cv::noArray() (or + simply cv::Mat() as you probably did before). +- The class is designed solely for passing parameters. That is, normally you *should not* + declare class members, local and global variables of this type. +- If you want to design your own function or a class method that can operate of arrays of + multiple types, you can use InputArray (or OutputArray) for the respective parameters. Inside + a function you should use _InputArray::getMat() method to construct a matrix header for the + array (without copying data). _InputArray::kind() can be used to distinguish Mat from + `vector<>` etc., but normally it is not needed. + +Here is how you can use a function that takes InputArray : +@code + std::vector vec; + // points or a circle + for( int i = 0; i < 30; i++ ) + vec.push_back(Point2f((float)(100 + 30*cos(i*CV_PI*2/5)), + (float)(100 - 30*sin(i*CV_PI*2/5)))); + cv::transform(vec, vec, cv::Matx23f(0.707, -0.707, 10, 0.707, 0.707, 20)); +@endcode +That is, we form an STL vector containing points, and apply in-place affine transformation to the +vector using the 2x3 matrix created inline as `Matx` instance. + +Here is how such a function can be implemented (for simplicity, we implement a very specific case of +it, according to the assertion statement inside) : +@code + void myAffineTransform(InputArray _src, OutputArray _dst, InputArray _m) + { + // get Mat headers for input arrays. This is O(1) operation, + // unless _src and/or _m are matrix expressions. + Mat src = _src.getMat(), m = _m.getMat(); + CV_Assert( src.type() == CV_32FC2 && m.type() == CV_32F && m.size() == Size(3, 2) ); + + // [re]create the output array so that it has the proper size and type. + // In case of Mat it calls Mat::create, in case of STL vector it calls vector::resize. + _dst.create(src.size(), src.type()); + Mat dst = _dst.getMat(); + + for( int i = 0; i < src.rows; i++ ) + for( int j = 0; j < src.cols; j++ ) + { + Point2f pt = src.at(i, j); + dst.at(i, j) = Point2f(m.at(0, 0)*pt.x + + m.at(0, 1)*pt.y + + m.at(0, 2), + m.at(1, 0)*pt.x + + m.at(1, 1)*pt.y + + m.at(1, 2)); + } + } +@endcode +There is another related type, InputArrayOfArrays, which is currently defined as a synonym for +InputArray: +@code + typedef InputArray InputArrayOfArrays; +@endcode +It denotes function arguments that are either vectors of vectors or vectors of matrices. A separate +synonym is needed to generate Python/Java etc. wrappers properly. At the function implementation +level their use is similar, but _InputArray::getMat(idx) should be used to get header for the +idx-th component of the outer vector and _InputArray::size().area() should be used to find the +number of components (vectors/matrices) of the outer vector. + +In general, type support is limited to cv::Mat types. Other types are forbidden. +But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc. +This data is not intended to be interpreted as an image data, or processed somehow like regular cv::Mat. +To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers. +Custom type is wrapped as Mat-compatible `CV_8UC` values (N = sizeof(T), N <= CV_CN_MAX). + */ +class CV_EXPORTS _InputArray +{ +public: + enum KindFlag { + KIND_SHIFT = 16, + FIXED_TYPE = 0x8000 << KIND_SHIFT, + FIXED_SIZE = 0x4000 << KIND_SHIFT, + KIND_MASK = 31 << KIND_SHIFT, + + NONE = 0 << KIND_SHIFT, + MAT = 1 << KIND_SHIFT, + MATX = 2 << KIND_SHIFT, + STD_VECTOR = 3 << KIND_SHIFT, + STD_VECTOR_VECTOR = 4 << KIND_SHIFT, + STD_VECTOR_MAT = 5 << KIND_SHIFT, + EXPR = 6 << KIND_SHIFT, //!< removed + OPENGL_BUFFER = 7 << KIND_SHIFT, + CUDA_HOST_MEM = 8 << KIND_SHIFT, + CUDA_GPU_MAT = 9 << KIND_SHIFT, + UMAT =10 << KIND_SHIFT, + STD_VECTOR_UMAT =11 << KIND_SHIFT, + STD_BOOL_VECTOR =12 << KIND_SHIFT, + STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT, + STD_ARRAY =14 << KIND_SHIFT, + STD_ARRAY_MAT =15 << KIND_SHIFT + }; + + _InputArray(); + _InputArray(int _flags, void* _obj); + _InputArray(const Mat& m); + _InputArray(const MatExpr& expr); + _InputArray(const std::vector& vec); + template _InputArray(const Mat_<_Tp>& m); + template _InputArray(const std::vector<_Tp>& vec); + _InputArray(const std::vector& vec); + template _InputArray(const std::vector >& vec); + _InputArray(const std::vector >&) = delete; // not supported + template _InputArray(const std::vector >& vec); + template _InputArray(const _Tp* vec, int n); + template _InputArray(const Matx<_Tp, m, n>& matx); + _InputArray(const double& val); + _InputArray(const cuda::GpuMat& d_mat); + _InputArray(const std::vector& d_mat_array); + _InputArray(const ogl::Buffer& buf); + _InputArray(const cuda::HostMem& cuda_mem); + template _InputArray(const cudev::GpuMat_<_Tp>& m); + _InputArray(const UMat& um); + _InputArray(const std::vector& umv); + + template _InputArray(const std::array<_Tp, _Nm>& arr); + template _InputArray(const std::array& arr); + + template static _InputArray rawIn(const std::vector<_Tp>& vec); + template static _InputArray rawIn(const std::array<_Tp, _Nm>& arr); + + Mat getMat(int idx=-1) const; + Mat getMat_(int idx=-1) const; + UMat getUMat(int idx=-1) const; + void getMatVector(std::vector& mv) const; + void getUMatVector(std::vector& umv) const; + void getGpuMatVector(std::vector& gpumv) const; + cuda::GpuMat getGpuMat() const; + ogl::Buffer getOGlBuffer() const; + + int getFlags() const; + void* getObj() const; + Size getSz() const; + + _InputArray::KindFlag kind() const; + int dims(int i=-1) const; + int cols(int i=-1) const; + int rows(int i=-1) const; + Size size(int i=-1) const; + int sizend(int* sz, int i=-1) const; + bool sameSize(const _InputArray& arr) const; + size_t total(int i=-1) const; + int type(int i=-1) const; + int depth(int i=-1) const; + int channels(int i=-1) const; + bool isContinuous(int i=-1) const; + bool isSubmatrix(int i=-1) const; + bool empty() const; + void copyTo(const _OutputArray& arr) const; + void copyTo(const _OutputArray& arr, const _InputArray & mask) const; + size_t offset(int i=-1) const; + size_t step(int i=-1) const; + bool isMat() const; + bool isUMat() const; + bool isMatVector() const; + bool isUMatVector() const; + bool isMatx() const; + bool isVector() const; + bool isGpuMat() const; + bool isGpuMatVector() const; + ~_InputArray(); + +protected: + int flags; + void* obj; + Size sz; + + void init(int _flags, const void* _obj); + void init(int _flags, const void* _obj, Size _sz); +}; +CV_ENUM_FLAGS(_InputArray::KindFlag) +__CV_ENUM_FLAGS_BITWISE_AND(_InputArray::KindFlag, int, _InputArray::KindFlag) + +/** @brief This type is very similar to InputArray except that it is used for input/output and output function +parameters. + +Just like with InputArray, OpenCV users should not care about OutputArray, they just pass `Mat`, +`vector` etc. to the functions. The same limitation as for `InputArray`: *Do not explicitly +create OutputArray instances* applies here too. + +If you want to make your function polymorphic (i.e. accept different arrays as output parameters), +it is also not very difficult. Take the sample above as the reference. Note that +_OutputArray::create() needs to be called before _OutputArray::getMat(). This way you guarantee +that the output array is properly allocated. + +Optional output parameters. If you do not need certain output array to be computed and returned to +you, pass cv::noArray(), just like you would in the case of optional input array. At the +implementation level, use _OutputArray::needed() to check if certain output array needs to be +computed or not. + +There are several synonyms for OutputArray that are used to assist automatic Python/Java/... wrapper +generators: +@code + typedef OutputArray OutputArrayOfArrays; + typedef OutputArray InputOutputArray; + typedef OutputArray InputOutputArrayOfArrays; +@endcode + */ +class CV_EXPORTS _OutputArray : public _InputArray +{ +public: + enum DepthMask + { + DEPTH_MASK_8U = 1 << CV_8U, + DEPTH_MASK_8S = 1 << CV_8S, + DEPTH_MASK_16U = 1 << CV_16U, + DEPTH_MASK_16S = 1 << CV_16S, + DEPTH_MASK_32S = 1 << CV_32S, + DEPTH_MASK_32F = 1 << CV_32F, + DEPTH_MASK_64F = 1 << CV_64F, + DEPTH_MASK_16F = 1 << CV_16F, + DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1, + DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S, + DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1, + DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F + }; + + _OutputArray(); + _OutputArray(int _flags, void* _obj); + _OutputArray(Mat& m); + _OutputArray(std::vector& vec); + _OutputArray(cuda::GpuMat& d_mat); + _OutputArray(std::vector& d_mat); + _OutputArray(ogl::Buffer& buf); + _OutputArray(cuda::HostMem& cuda_mem); + template _OutputArray(cudev::GpuMat_<_Tp>& m); + template _OutputArray(std::vector<_Tp>& vec); + _OutputArray(std::vector& vec) = delete; // not supported + template _OutputArray(std::vector >& vec); + _OutputArray(std::vector >&) = delete; // not supported + template _OutputArray(std::vector >& vec); + template _OutputArray(Mat_<_Tp>& m); + template _OutputArray(_Tp* vec, int n); + template _OutputArray(Matx<_Tp, m, n>& matx); + _OutputArray(UMat& m); + _OutputArray(std::vector& vec); + + _OutputArray(const Mat& m); + _OutputArray(const std::vector& vec); + _OutputArray(const cuda::GpuMat& d_mat); + _OutputArray(const std::vector& d_mat); + _OutputArray(const ogl::Buffer& buf); + _OutputArray(const cuda::HostMem& cuda_mem); + template _OutputArray(const cudev::GpuMat_<_Tp>& m); + template _OutputArray(const std::vector<_Tp>& vec); + template _OutputArray(const std::vector >& vec); + template _OutputArray(const std::vector >& vec); + template _OutputArray(const Mat_<_Tp>& m); + template _OutputArray(const _Tp* vec, int n); + template _OutputArray(const Matx<_Tp, m, n>& matx); + _OutputArray(const UMat& m); + _OutputArray(const std::vector& vec); + + template _OutputArray(std::array<_Tp, _Nm>& arr); + template _OutputArray(const std::array<_Tp, _Nm>& arr); + template _OutputArray(std::array& arr); + template _OutputArray(const std::array& arr); + + template static _OutputArray rawOut(std::vector<_Tp>& vec); + template static _OutputArray rawOut(std::array<_Tp, _Nm>& arr); + + bool fixedSize() const; + bool fixedType() const; + bool needed() const; + Mat& getMatRef(int i=-1) const; + UMat& getUMatRef(int i=-1) const; + cuda::GpuMat& getGpuMatRef() const; + std::vector& getGpuMatVecRef() const; + ogl::Buffer& getOGlBufferRef() const; + cuda::HostMem& getHostMemRef() const; + void create(Size sz, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void createSameSize(const _InputArray& arr, int mtype) const; + void release() const; + void clear() const; + void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const; + + void assign(const UMat& u) const; + void assign(const Mat& m) const; + + void assign(const std::vector& v) const; + void assign(const std::vector& v) const; + + void move(UMat& u) const; + void move(Mat& m) const; +}; + + +class CV_EXPORTS _InputOutputArray : public _OutputArray +{ +public: + _InputOutputArray(); + _InputOutputArray(int _flags, void* _obj); + _InputOutputArray(Mat& m); + _InputOutputArray(std::vector& vec); + _InputOutputArray(cuda::GpuMat& d_mat); + _InputOutputArray(ogl::Buffer& buf); + _InputOutputArray(cuda::HostMem& cuda_mem); + template _InputOutputArray(cudev::GpuMat_<_Tp>& m); + template _InputOutputArray(std::vector<_Tp>& vec); + _InputOutputArray(std::vector& vec) = delete; // not supported + template _InputOutputArray(std::vector >& vec); + template _InputOutputArray(std::vector >& vec); + template _InputOutputArray(Mat_<_Tp>& m); + template _InputOutputArray(_Tp* vec, int n); + template _InputOutputArray(Matx<_Tp, m, n>& matx); + _InputOutputArray(UMat& m); + _InputOutputArray(std::vector& vec); + + _InputOutputArray(const Mat& m); + _InputOutputArray(const std::vector& vec); + _InputOutputArray(const cuda::GpuMat& d_mat); + _InputOutputArray(const std::vector& d_mat); + _InputOutputArray(const ogl::Buffer& buf); + _InputOutputArray(const cuda::HostMem& cuda_mem); + template _InputOutputArray(const cudev::GpuMat_<_Tp>& m); + template _InputOutputArray(const std::vector<_Tp>& vec); + template _InputOutputArray(const std::vector >& vec); + template _InputOutputArray(const std::vector >& vec); + template _InputOutputArray(const Mat_<_Tp>& m); + template _InputOutputArray(const _Tp* vec, int n); + template _InputOutputArray(const Matx<_Tp, m, n>& matx); + _InputOutputArray(const UMat& m); + _InputOutputArray(const std::vector& vec); + + template _InputOutputArray(std::array<_Tp, _Nm>& arr); + template _InputOutputArray(const std::array<_Tp, _Nm>& arr); + template _InputOutputArray(std::array& arr); + template _InputOutputArray(const std::array& arr); + + template static _InputOutputArray rawInOut(std::vector<_Tp>& vec); + template _InputOutputArray rawInOut(std::array<_Tp, _Nm>& arr); + +}; + +/** Helper to wrap custom types. @see InputArray */ +template static inline _InputArray rawIn(_Tp& v); +/** Helper to wrap custom types. @see InputArray */ +template static inline _OutputArray rawOut(_Tp& v); +/** Helper to wrap custom types. @see InputArray */ +template static inline _InputOutputArray rawInOut(_Tp& v); + +CV__DEBUG_NS_END + +typedef const _InputArray& InputArray; +typedef InputArray InputArrayOfArrays; +typedef const _OutputArray& OutputArray; +typedef OutputArray OutputArrayOfArrays; +typedef const _InputOutputArray& InputOutputArray; +typedef InputOutputArray InputOutputArrayOfArrays; + +CV_EXPORTS InputOutputArray noArray(); + +/////////////////////////////////// MatAllocator ////////////////////////////////////// + +//! Usage flags for allocator +enum UMatUsageFlags +{ + USAGE_DEFAULT = 0, + + // buffer allocation policy is platform and usage specific + USAGE_ALLOCATE_HOST_MEMORY = 1 << 0, + USAGE_ALLOCATE_DEVICE_MEMORY = 1 << 1, + USAGE_ALLOCATE_SHARED_MEMORY = 1 << 2, // It is not equal to: USAGE_ALLOCATE_HOST_MEMORY | USAGE_ALLOCATE_DEVICE_MEMORY + + __UMAT_USAGE_FLAGS_32BIT = 0x7fffffff // Binary compatibility hint +}; + +struct CV_EXPORTS UMatData; + +/** @brief Custom array allocator +*/ +class CV_EXPORTS MatAllocator +{ +public: + MatAllocator() {} + virtual ~MatAllocator() {} + + // let's comment it off for now to detect and fix all the uses of allocator + //virtual void allocate(int dims, const int* sizes, int type, int*& refcount, + // uchar*& datastart, uchar*& data, size_t* step) = 0; + //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0; + virtual UMatData* allocate(int dims, const int* sizes, int type, + void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const = 0; + virtual bool allocate(UMatData* data, AccessFlag accessflags, UMatUsageFlags usageFlags) const = 0; + virtual void deallocate(UMatData* data) const = 0; + virtual void map(UMatData* data, AccessFlag accessflags) const; + virtual void unmap(UMatData* data) const; + virtual void download(UMatData* data, void* dst, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dststep[]) const; + virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[], + const size_t dstofs[], const size_t dststep[], + const size_t srcstep[]) const; + virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], bool sync) const; + + // default implementation returns DummyBufferPoolController + virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const; +}; + + +//////////////////////////////// MatCommaInitializer ////////////////////////////////// + +/** @brief Comma-separated Matrix Initializer + + The class instances are usually not created explicitly. + Instead, they are created on "matrix << firstValue" operator. + + The sample below initializes 2x2 rotation matrix: + + \code + double angle = 30, a = cos(angle*CV_PI/180), b = sin(angle*CV_PI/180); + Mat R = (Mat_(2,2) << a, -b, b, a); + \endcode +*/ +template class MatCommaInitializer_ +{ +public: + //! the constructor, created by "matrix << firstValue" operator, where matrix is cv::Mat + MatCommaInitializer_(Mat_<_Tp>* _m); + //! the operator that takes the next value and put it to the matrix + template MatCommaInitializer_<_Tp>& operator , (T2 v); + //! another form of conversion operator + operator Mat_<_Tp>() const; +protected: + MatIterator_<_Tp> it; +}; + + +/////////////////////////////////////// Mat /////////////////////////////////////////// + +// note that umatdata might be allocated together +// with the matrix data, not as a separate object. +// therefore, it does not have constructor or destructor; +// it should be explicitly initialized using init(). +struct CV_EXPORTS UMatData +{ + enum MemoryFlag { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2, + DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24, + USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64, + ASYNC_CLEANUP=128 + }; + UMatData(const MatAllocator* allocator); + ~UMatData(); + + // provide atomic access to the structure + void lock(); + void unlock(); + + bool hostCopyObsolete() const; + bool deviceCopyObsolete() const; + bool deviceMemMapped() const; + bool copyOnMap() const; + bool tempUMat() const; + bool tempCopiedUMat() const; + void markHostCopyObsolete(bool flag); + void markDeviceCopyObsolete(bool flag); + void markDeviceMemMapped(bool flag); + + const MatAllocator* prevAllocator; + const MatAllocator* currAllocator; + int urefcount; + int refcount; + uchar* data; + uchar* origdata; + size_t size; + + UMatData::MemoryFlag flags; + void* handle; + void* userdata; + int allocatorFlags_; + int mapcount; + UMatData* originalUMatData; + std::shared_ptr allocatorContext; +}; +CV_ENUM_FLAGS(UMatData::MemoryFlag) + + +struct CV_EXPORTS MatSize +{ + explicit MatSize(int* _p); + int dims() const; + Size operator()() const; + const int& operator[](int i) const; + int& operator[](int i); + operator const int*() const; // TODO OpenCV 4.0: drop this + bool operator == (const MatSize& sz) const; + bool operator != (const MatSize& sz) const; + + int* p; +}; + +struct CV_EXPORTS MatStep +{ + MatStep(); + explicit MatStep(size_t s); + const size_t& operator[](int i) const; + size_t& operator[](int i); + operator size_t() const; + MatStep& operator = (size_t s); + + size_t* p; + size_t buf[2]; +protected: + MatStep& operator = (const MatStep&); +}; + +/** @example samples/cpp/cout_mat.cpp +An example demonstrating the serial out capabilities of cv::Mat +*/ + + /** @brief n-dimensional dense array class \anchor CVMat_Details + +The class Mat represents an n-dimensional dense numerical single-channel or multi-channel array. It +can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel +volumes, vector fields, point clouds, tensors, histograms (though, very high-dimensional histograms +may be better stored in a SparseMat ). The data layout of the array `M` is defined by the array +`M.step[]`, so that the address of element \f$(i_0,...,i_{M.dims-1})\f$, where \f$0\leq i_k= M.step[i+1]` (in fact, `M.step[i] >= M.step[i+1]*M.size[i+1]` ). This means +that 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane, +and so on. M.step[M.dims-1] is minimal and always equal to the element size M.elemSize() . + +So, the data layout in Mat is compatible with the majority of dense array types from the standard +toolkits and SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, +that is, with any array that uses *steps* (or *strides*) to compute the position of a pixel. +Due to this compatibility, it is possible to make a Mat header for user-allocated data and process +it in-place using OpenCV functions. + +There are many different ways to create a Mat object. The most popular options are listed below: + +- Use the create(nrows, ncols, type) method or the similar Mat(nrows, ncols, type[, fillValue]) +constructor. A new array of the specified size and type is allocated. type has the same meaning as +in the cvCreateMat method. For example, CV_8UC1 means a 8-bit single-channel array, CV_32FC2 +means a 2-channel (complex) floating-point array, and so on. +@code + // make a 7x7 complex matrix filled with 1+3j. + Mat M(7,7,CV_32FC2,Scalar(1,3)); + // and now turn M to a 100x60 15-channel 8-bit matrix. + // The old content will be deallocated + M.create(100,60,CV_8UC(15)); +@endcode +As noted in the introduction to this chapter, create() allocates only a new array when the shape +or type of the current array are different from the specified ones. + +- Create a multi-dimensional array: +@code + // create a 100x100x100 8-bit array + int sz[] = {100, 100, 100}; + Mat bigCube(3, sz, CV_8U, Scalar::all(0)); +@endcode +It passes the number of dimensions =1 to the Mat constructor but the created array will be +2-dimensional with the number of columns set to 1. So, Mat::dims is always \>= 2 (can also be 0 +when the array is empty). + +- Use a copy constructor or assignment operator where there can be an array or expression on the +right side (see below). As noted in the introduction, the array assignment is an O(1) operation +because it only copies the header and increases the reference counter. The Mat::clone() method can +be used to get a full (deep) copy of the array when you need it. + +- Construct a header for a part of another array. It can be a single row, single column, several +rows, several columns, rectangular region in the array (called a *minor* in algebra) or a +diagonal. Such operations are also O(1) because the new header references the same data. You can +actually modify a part of the array using this feature, for example: +@code + // add the 5-th row, multiplied by 3 to the 3rd row + M.row(3) = M.row(3) + M.row(5)*3; + // now copy the 7-th column to the 1-st column + // M.col(1) = M.col(7); // this will not work + Mat M1 = M.col(1); + M.col(7).copyTo(M1); + // create a new 320x240 image + Mat img(Size(320,240),CV_8UC3); + // select a ROI + Mat roi(img, Rect(10,10,100,100)); + // fill the ROI with (0,255,0) (which is green in RGB space); + // the original 320x240 image will be modified + roi = Scalar(0,255,0); +@endcode +Due to the additional datastart and dataend members, it is possible to compute a relative +sub-array position in the main *container* array using locateROI(): +@code + Mat A = Mat::eye(10, 10, CV_32S); + // extracts A columns, 1 (inclusive) to 3 (exclusive). + Mat B = A(Range::all(), Range(1, 3)); + // extracts B rows, 5 (inclusive) to 9 (exclusive). + // that is, C \~ A(Range(5, 9), Range(1, 3)) + Mat C = B(Range(5, 9), Range::all()); + Size size; Point ofs; + C.locateROI(size, ofs); + // size will be (width=10,height=10) and the ofs will be (x=1, y=5) +@endcode +As in case of whole matrices, if you need a deep copy, use the `clone()` method of the extracted +sub-matrices. + +- Make a header for user-allocated data. It can be useful to do the following: + -# Process "foreign" data using OpenCV (for example, when you implement a DirectShow\* filter or + a processing module for gstreamer, and so on). For example: + @code + Mat process_video_frame(const unsigned char* pixels, + int width, int height, int step) + { + // wrap input buffer + Mat img(height, width, CV_8UC3, (unsigned char*)pixels, step); + + Mat result; + GaussianBlur(img, result, Size(7, 7), 1.5, 1.5); + + return result; + } + @endcode + -# Quickly initialize small matrices and/or get a super-fast element access. + @code + double m[3][3] = {{a, b, c}, {d, e, f}, {g, h, i}}; + Mat M = Mat(3, 3, CV_64F, m).inv(); + @endcode + . + +- Use MATLAB-style array initializers, zeros(), ones(), eye(), for example: +@code + // create a double-precision identity matrix and add it to M. + M += Mat::eye(M.rows, M.cols, CV_64F); +@endcode + +- Use a comma-separated initializer: +@code + // create a 3x3 double-precision identity matrix + Mat M = (Mat_(3,3) << 1, 0, 0, 0, 1, 0, 0, 0, 1); +@endcode +With this approach, you first call a constructor of the Mat class with the proper parameters, and +then you just put `<< operator` followed by comma-separated values that can be constants, +variables, expressions, and so on. Also, note the extra parentheses required to avoid compilation +errors. + +Once the array is created, it is automatically managed via a reference-counting mechanism. If the +array header is built on top of user-allocated data, you should handle the data by yourself. The +array data is deallocated when no one points to it. If you want to release the data pointed by a +array header before the array destructor is called, use Mat::release(). + +The next important thing to learn about the array class is element access. This manual already +described how to compute an address of each array element. Normally, you are not required to use the +formula directly in the code. If you know the array element type (which can be retrieved using the +method Mat::type() ), you can access the element \f$M_{ij}\f$ of a 2-dimensional array as: +@code + M.at(i,j) += 1.f; +@endcode +assuming that `M` is a double-precision floating-point array. There are several variants of the method +at for a different number of dimensions. + +If you need to process a whole row of a 2D array, the most efficient way is to get the pointer to +the row first, and then just use the plain C operator [] : +@code + // compute sum of positive matrix elements + // (assuming that M is a double-precision matrix) + double sum=0; + for(int i = 0; i < M.rows; i++) + { + const double* Mi = M.ptr(i); + for(int j = 0; j < M.cols; j++) + sum += std::max(Mi[j], 0.); + } +@endcode +Some operations, like the one above, do not actually depend on the array shape. They just process +elements of an array one by one (or elements from multiple arrays that have the same coordinates, +for example, array addition). Such operations are called *element-wise*. It makes sense to check +whether all the input/output arrays are continuous, namely, have no gaps at the end of each row. If +yes, process them as a long single row: +@code + // compute the sum of positive matrix elements, optimized variant + double sum=0; + int cols = M.cols, rows = M.rows; + if(M.isContinuous()) + { + cols *= rows; + rows = 1; + } + for(int i = 0; i < rows; i++) + { + const double* Mi = M.ptr(i); + for(int j = 0; j < cols; j++) + sum += std::max(Mi[j], 0.); + } +@endcode +In case of the continuous matrix, the outer loop body is executed just once. So, the overhead is +smaller, which is especially noticeable in case of small matrices. + +Finally, there are STL-style iterators that are smart enough to skip gaps between successive rows: +@code + // compute sum of positive matrix elements, iterator-based variant + double sum=0; + MatConstIterator_ it = M.begin(), it_end = M.end(); + for(; it != it_end; ++it) + sum += std::max(*it, 0.); +@endcode +The matrix iterators are random-access iterators, so they can be passed to any STL algorithm, +including std::sort(). + +@note Matrix Expressions and arithmetic see MatExpr +*/ +class CV_EXPORTS Mat +{ +public: + /** + These are various constructors that form a matrix. As noted in the AutomaticAllocation, often + the default constructor is enough, and the proper matrix will be allocated by an OpenCV function. + The constructed matrix can further be assigned to another matrix or matrix expression or can be + allocated with Mat::create . In the former case, the old content is de-referenced. + */ + Mat(); + + /** @overload + @param rows Number of rows in a 2D array. + @param cols Number of columns in a 2D array. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(int rows, int cols, int type); + + /** @overload + @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the + number of columns go in the reverse order. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(Size size, int type); + + /** @overload + @param rows Number of rows in a 2D array. + @param cols Number of columns in a 2D array. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(int rows, int cols, int type, const Scalar& s); + + /** @overload + @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the + number of columns go in the reverse order. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(Size size, int type, const Scalar& s); + + /** @overload + @param ndims Array dimensionality. + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(int ndims, const int* sizes, int type); + + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(const std::vector& sizes, int type); + + /** @overload + @param ndims Array dimensionality. + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(int ndims, const int* sizes, int type, const Scalar& s); + + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(const std::vector& sizes, int type, const Scalar& s); + + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + */ + Mat(const Mat& m); + + /** @overload + @param rows Number of rows in a 2D array. + @param cols Number of columns in a 2D array. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param step Number of bytes each matrix row occupies. The value should include the padding bytes at + the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed + and the actual step is calculated as cols*elemSize(). See Mat::elemSize. + */ + Mat(int rows, int cols, int type, void* data, size_t step=AUTO_STEP); + + /** @overload + @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the + number of columns go in the reverse order. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param step Number of bytes each matrix row occupies. The value should include the padding bytes at + the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed + and the actual step is calculated as cols*elemSize(). See Mat::elemSize. + */ + Mat(Size size, int type, void* data, size_t step=AUTO_STEP); + + /** @overload + @param ndims Array dimensionality. + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0); + + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + Mat(const std::vector& sizes, int type, void* data, const size_t* steps=0); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param rowRange Range of the m rows to take. As usual, the range start is inclusive and the range + end is exclusive. Use Range::all() to take all the rows. + @param colRange Range of the m columns to take. Use Range::all() to take all the columns. + */ + Mat(const Mat& m, const Range& rowRange, const Range& colRange=Range::all()); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param roi Region of interest. + */ + Mat(const Mat& m, const Rect& roi); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param ranges Array of selected ranges of m along each dimensionality. + */ + Mat(const Mat& m, const Range* ranges); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param ranges Array of selected ranges of m along each dimensionality. + */ + Mat(const Mat& m, const std::vector& ranges); + + /** @overload + @param vec STL vector whose elements form the matrix. The matrix has a single column and the number + of rows equal to the number of vector elements. Type of the matrix matches the type of vector + elements. The constructor can handle arbitrary types, for which there is a properly declared + DataType . This means that the vector elements must be primitive numbers or uni-type numerical + tuples of numbers. Mixed-type structures are not supported. The corresponding constructor is + explicit. Since STL vectors are not automatically converted to Mat instances, you should write + Mat(vec) explicitly. Unless you copy the data into the matrix ( copyData=true ), no new elements + will be added to the vector because it can potentially yield vector data reallocation, and, thus, + the matrix data pointer will be invalid. + @param copyData Flag to specify whether the underlying data of the STL vector should be copied + to (true) or shared with (false) the newly constructed matrix. When the data is copied, the + allocated buffer is managed using Mat reference counting mechanism. While the data is shared, + the reference counter is NULL, and you should not deallocate the data until the matrix is not + destructed. + */ + template explicit Mat(const std::vector<_Tp>& vec, bool copyData=false); + + /** @overload + */ + template::value>::type> + explicit Mat(const std::initializer_list<_Tp> list); + + /** @overload + */ + template explicit Mat(const std::initializer_list sizes, const std::initializer_list<_Tp> list); + + /** @overload + */ + template explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false); + + /** @overload + */ + template explicit Mat(const Vec<_Tp, n>& vec, bool copyData=true); + + /** @overload + */ + template explicit Mat(const Matx<_Tp, m, n>& mtx, bool copyData=true); + + /** @overload + */ + template explicit Mat(const Point_<_Tp>& pt, bool copyData=true); + + /** @overload + */ + template explicit Mat(const Point3_<_Tp>& pt, bool copyData=true); + + /** @overload + */ + template explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer); + + //! download data from GpuMat + explicit Mat(const cuda::GpuMat& m); + + //! destructor - calls release() + ~Mat(); + + /** @brief assignment operators + + These are available assignment operators. Since they all are very different, make sure to read the + operator parameters description. + @param m Assigned, right-hand-side matrix. Matrix assignment is an O(1) operation. This means that + no data is copied but the data is shared and the reference counter, if any, is incremented. Before + assigning new data, the old data is de-referenced via Mat::release . + */ + Mat& operator = (const Mat& m); + + /** @overload + @param expr Assigned matrix expression object. As opposite to the first form of the assignment + operation, the second form can reuse already allocated matrix if it has the right size and type to + fit the matrix expression result. It is automatically handled by the real function that the matrix + expressions is expanded to. For example, C=A+B is expanded to add(A, B, C), and add takes care of + automatic C reallocation. + */ + Mat& operator = (const MatExpr& expr); + + //! retrieve UMat from Mat + UMat getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const; + + /** @brief Creates a matrix header for the specified matrix row. + + The method makes a new header for the specified matrix row and returns it. This is an O(1) + operation, regardless of the matrix size. The underlying data of the new matrix is shared with the + original matrix. Here is the example of one of the classical basic matrix processing operations, + axpy, used by LU and many other algorithms: + @code + inline void matrix_axpy(Mat& A, int i, int j, double alpha) + { + A.row(i) += A.row(j)*alpha; + } + @endcode + @note In the current implementation, the following code does not work as expected: + @code + Mat A; + ... + A.row(i) = A.row(j); // will not work + @endcode + This happens because A.row(i) forms a temporary header that is further assigned to another header. + Remember that each of these operations is O(1), that is, no data is copied. Thus, the above + assignment is not true if you may have expected the j-th row to be copied to the i-th row. To + achieve that, you should either turn this simple assignment into an expression or use the + Mat::copyTo method: + @code + Mat A; + ... + // works, but looks a bit obscure. + A.row(i) = A.row(j) + 0; + // this is a bit longer, but the recommended method. + A.row(j).copyTo(A.row(i)); + @endcode + @param y A 0-based row index. + */ + Mat row(int y) const; + + /** @brief Creates a matrix header for the specified matrix column. + + The method makes a new header for the specified matrix column and returns it. This is an O(1) + operation, regardless of the matrix size. The underlying data of the new matrix is shared with the + original matrix. See also the Mat::row description. + @param x A 0-based column index. + */ + Mat col(int x) const; + + /** @brief Creates a matrix header for the specified row span. + + The method makes a new header for the specified row span of the matrix. Similarly to Mat::row and + Mat::col , this is an O(1) operation. + @param startrow An inclusive 0-based start index of the row span. + @param endrow An exclusive 0-based ending index of the row span. + */ + Mat rowRange(int startrow, int endrow) const; + + /** @overload + @param r Range structure containing both the start and the end indices. + */ + Mat rowRange(const Range& r) const; + + /** @brief Creates a matrix header for the specified column span. + + The method makes a new header for the specified column span of the matrix. Similarly to Mat::row and + Mat::col , this is an O(1) operation. + @param startcol An inclusive 0-based start index of the column span. + @param endcol An exclusive 0-based ending index of the column span. + */ + Mat colRange(int startcol, int endcol) const; + + /** @overload + @param r Range structure containing both the start and the end indices. + */ + Mat colRange(const Range& r) const; + + /** @brief Extracts a diagonal from a matrix + + The method makes a new header for the specified matrix diagonal. The new matrix is represented as a + single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation. + @param d index of the diagonal, with the following values: + - `d=0` is the main diagonal. + - `d<0` is a diagonal from the lower half. For example, d=-1 means the diagonal is set + immediately below the main one. + - `d>0` is a diagonal from the upper half. For example, d=1 means the diagonal is set + immediately above the main one. + For example: + @code + Mat m = (Mat_(3,3) << + 1,2,3, + 4,5,6, + 7,8,9); + Mat d0 = m.diag(0); + Mat d1 = m.diag(1); + Mat d_1 = m.diag(-1); + @endcode + The resulting matrices are + @code + d0 = + [1; + 5; + 9] + d1 = + [2; + 6] + d_1 = + [4; + 8] + @endcode + */ + Mat diag(int d=0) const; + + /** @brief creates a diagonal matrix + + The method creates a square diagonal matrix from specified main diagonal. + @param d One-dimensional matrix that represents the main diagonal. + */ + static Mat diag(const Mat& d); + + /** @brief Creates a full copy of the array and the underlying data. + + The method creates a full copy of the array. The original step[] is not taken into account. So, the + array copy is a continuous array occupying total()*elemSize() bytes. + */ + Mat clone() const CV_NODISCARD; + + /** @brief Copies the matrix to another one. + + The method copies the matrix data to another matrix. Before copying the data, the method invokes : + @code + m.create(this->size(), this->type()); + @endcode + so that the destination matrix is reallocated if needed. While m.copyTo(m); works flawlessly, the + function does not handle the case of a partial overlap between the source and the destination + matrices. + + When the operation mask is specified, if the Mat::create call shown above reallocates the matrix, + the newly allocated matrix is initialized with all zeros before copying the data. + @param m Destination matrix. If it does not have a proper size or type before the operation, it is + reallocated. + */ + void copyTo( OutputArray m ) const; + + /** @overload + @param m Destination matrix. If it does not have a proper size or type before the operation, it is + reallocated. + @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix + elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels. + */ + void copyTo( OutputArray m, InputArray mask ) const; + + /** @brief Converts an array to another data type with optional scaling. + + The method converts source pixel values to the target data type. saturate_cast\<\> is applied at + the end to avoid possible overflows: + + \f[m(x,y) = saturate \_ cast( \alpha (*this)(x,y) + \beta )\f] + @param m output matrix; if it does not have a proper size or type before the operation, it is + reallocated. + @param rtype desired output matrix type or, rather, the depth since the number of channels are the + same as the input has; if rtype is negative, the output matrix will have the same type as the input. + @param alpha optional scale factor. + @param beta optional delta added to the scaled values. + */ + void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const; + + /** @brief Provides a functional form of convertTo. + + This is an internally used method called by the @ref MatrixExpressions engine. + @param m Destination array. + @param type Desired destination array depth (or -1 if it should be the same as the source type). + */ + void assignTo( Mat& m, int type=-1 ) const; + + /** @brief Sets all or some of the array elements to the specified value. + @param s Assigned scalar converted to the actual array type. + */ + Mat& operator = (const Scalar& s); + + /** @brief Sets all or some of the array elements to the specified value. + + This is an advanced variant of the Mat::operator=(const Scalar& s) operator. + @param value Assigned scalar converted to the actual array type. + @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix + elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels + */ + Mat& setTo(InputArray value, InputArray mask=noArray()); + + /** @brief Changes the shape and/or the number of channels of a 2D matrix without copying the data. + + The method makes a new matrix header for \*this elements. The new matrix may have a different size + and/or different number of channels. Any combination is possible if: + - No extra elements are included into the new matrix and no elements are excluded. Consequently, + the product rows\*cols\*channels() must stay the same after the transformation. + - No data is copied. That is, this is an O(1) operation. Consequently, if you change the number of + rows, or the operation changes the indices of elements row in some other way, the matrix must be + continuous. See Mat::isContinuous . + + For example, if there is a set of 3D points stored as an STL vector, and you want to represent the + points as a 3xN matrix, do the following: + @code + std::vector vec; + ... + Mat pointMat = Mat(vec). // convert vector to Mat, O(1) operation + reshape(1). // make Nx3 1-channel matrix out of Nx1 3-channel. + // Also, an O(1) operation + t(); // finally, transpose the Nx3 matrix. + // This involves copying all the elements + @endcode + @param cn New number of channels. If the parameter is 0, the number of channels remains the same. + @param rows New number of rows. If the parameter is 0, the number of rows remains the same. + */ + Mat reshape(int cn, int rows=0) const; + + /** @overload */ + Mat reshape(int cn, int newndims, const int* newsz) const; + + /** @overload */ + Mat reshape(int cn, const std::vector& newshape) const; + + /** @brief Transposes a matrix. + + The method performs matrix transposition by means of matrix expressions. It does not perform the + actual transposition but returns a temporary matrix transposition object that can be further used as + a part of more complex matrix expressions or can be assigned to a matrix: + @code + Mat A1 = A + Mat::eye(A.size(), A.type())*lambda; + Mat C = A1.t()*A1; // compute (A + lambda*I)^t * (A + lamda*I) + @endcode + */ + MatExpr t() const; + + /** @brief Inverses a matrix. + + The method performs a matrix inversion by means of matrix expressions. This means that a temporary + matrix inversion object is returned by the method and can be used further as a part of more complex + matrix expressions or can be assigned to a matrix. + @param method Matrix inversion method. One of cv::DecompTypes + */ + MatExpr inv(int method=DECOMP_LU) const; + + /** @brief Performs an element-wise multiplication or division of the two matrices. + + The method returns a temporary object encoding per-element array multiplication, with optional + scale. Note that this is not a matrix multiplication that corresponds to a simpler "\*" operator. + + Example: + @code + Mat C = A.mul(5/B); // equivalent to divide(A, B, C, 5) + @endcode + @param m Another array of the same type and the same size as \*this, or a matrix expression. + @param scale Optional scale factor. + */ + MatExpr mul(InputArray m, double scale=1) const; + + /** @brief Computes a cross-product of two 3-element vectors. + + The method computes a cross-product of two 3-element vectors. The vectors must be 3-element + floating-point vectors of the same shape and size. The result is another 3-element vector of the + same shape and type as operands. + @param m Another cross-product operand. + */ + Mat cross(InputArray m) const; + + /** @brief Computes a dot-product of two vectors. + + The method computes a dot-product of two matrices. If the matrices are not single-column or + single-row vectors, the top-to-bottom left-to-right scan ordering is used to treat them as 1D + vectors. The vectors must have the same size and type. If the matrices have more than one channel, + the dot products from all the channels are summed together. + @param m another dot-product operand. + */ + double dot(InputArray m) const; + + /** @brief Returns a zero array of the specified size and type. + + The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant + array as a function parameter, part of a matrix expression, or as a matrix initializer: + @code + Mat A; + A = Mat::zeros(3, 3, CV_32F); + @endcode + In the example above, a new matrix is allocated only if A is not a 3x3 floating-point matrix. + Otherwise, the existing matrix A is filled with zeros. + @param rows Number of rows. + @param cols Number of columns. + @param type Created matrix type. + */ + static MatExpr zeros(int rows, int cols, int type); + + /** @overload + @param size Alternative to the matrix size specification Size(cols, rows) . + @param type Created matrix type. + */ + static MatExpr zeros(Size size, int type); + + /** @overload + @param ndims Array dimensionality. + @param sz Array of integers specifying the array shape. + @param type Created matrix type. + */ + static MatExpr zeros(int ndims, const int* sz, int type); + + /** @brief Returns an array of all 1's of the specified size and type. + + The method returns a Matlab-style 1's array initializer, similarly to Mat::zeros. Note that using + this method you can initialize an array with an arbitrary value, using the following Matlab idiom: + @code + Mat A = Mat::ones(100, 100, CV_8U)*3; // make 100x100 matrix filled with 3. + @endcode + The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it + just remembers the scale factor (3 in this case) and use it when actually invoking the matrix + initializer. + @note In case of multi-channels type, only the first channel will be initialized with 1's, the + others will be set to 0's. + @param rows Number of rows. + @param cols Number of columns. + @param type Created matrix type. + */ + static MatExpr ones(int rows, int cols, int type); + + /** @overload + @param size Alternative to the matrix size specification Size(cols, rows) . + @param type Created matrix type. + */ + static MatExpr ones(Size size, int type); + + /** @overload + @param ndims Array dimensionality. + @param sz Array of integers specifying the array shape. + @param type Created matrix type. + */ + static MatExpr ones(int ndims, const int* sz, int type); + + /** @brief Returns an identity matrix of the specified size and type. + + The method returns a Matlab-style identity matrix initializer, similarly to Mat::zeros. Similarly to + Mat::ones, you can use a scale operation to create a scaled identity matrix efficiently: + @code + // make a 4x4 diagonal matrix with 0.1's on the diagonal. + Mat A = Mat::eye(4, 4, CV_32F)*0.1; + @endcode + @note In case of multi-channels type, identity matrix will be initialized only for the first channel, + the others will be set to 0's + @param rows Number of rows. + @param cols Number of columns. + @param type Created matrix type. + */ + static MatExpr eye(int rows, int cols, int type); + + /** @overload + @param size Alternative matrix size specification as Size(cols, rows) . + @param type Created matrix type. + */ + static MatExpr eye(Size size, int type); + + /** @brief Allocates new array data if needed. + + This is one of the key Mat methods. Most new-style OpenCV functions and methods that produce arrays + call this method for each output array. The method uses the following algorithm: + + -# If the current array shape and the type match the new ones, return immediately. Otherwise, + de-reference the previous data by calling Mat::release. + -# Initialize the new header. + -# Allocate the new data of total()\*elemSize() bytes. + -# Allocate the new, associated with the data, reference counter and set it to 1. + + Such a scheme makes the memory management robust and efficient at the same time and helps avoid + extra typing for you. This means that usually there is no need to explicitly allocate output arrays. + That is, instead of writing: + @code + Mat color; + ... + Mat gray(color.rows, color.cols, color.depth()); + cvtColor(color, gray, COLOR_BGR2GRAY); + @endcode + you can simply write: + @code + Mat color; + ... + Mat gray; + cvtColor(color, gray, COLOR_BGR2GRAY); + @endcode + because cvtColor, as well as the most of OpenCV functions, calls Mat::create() for the output array + internally. + @param rows New number of rows. + @param cols New number of columns. + @param type New matrix type. + */ + void create(int rows, int cols, int type); + + /** @overload + @param size Alternative new matrix size specification: Size(cols, rows) + @param type New matrix type. + */ + void create(Size size, int type); + + /** @overload + @param ndims New array dimensionality. + @param sizes Array of integers specifying a new array shape. + @param type New matrix type. + */ + void create(int ndims, const int* sizes, int type); + + /** @overload + @param sizes Array of integers specifying a new array shape. + @param type New matrix type. + */ + void create(const std::vector& sizes, int type); + + /** @brief Increments the reference counter. + + The method increments the reference counter associated with the matrix data. If the matrix header + points to an external data set (see Mat::Mat ), the reference counter is NULL, and the method has no + effect in this case. Normally, to avoid memory leaks, the method should not be called explicitly. It + is called implicitly by the matrix assignment operator. The reference counter increment is an atomic + operation on the platforms that support it. Thus, it is safe to operate on the same matrices + asynchronously in different threads. + */ + void addref(); + + /** @brief Decrements the reference counter and deallocates the matrix if needed. + + The method decrements the reference counter associated with the matrix data. When the reference + counter reaches 0, the matrix data is deallocated and the data and the reference counter pointers + are set to NULL's. If the matrix header points to an external data set (see Mat::Mat ), the + reference counter is NULL, and the method has no effect in this case. + + This method can be called manually to force the matrix data deallocation. But since this method is + automatically called in the destructor, or by any other method that changes the data pointer, it is + usually not needed. The reference counter decrement and check for 0 is an atomic operation on the + platforms that support it. Thus, it is safe to operate on the same matrices asynchronously in + different threads. + */ + void release(); + + //! internal use function, consider to use 'release' method instead; deallocates the matrix data + void deallocate(); + //! internal use function; properly re-allocates _size, _step arrays + void copySize(const Mat& m); + + /** @brief Reserves space for the certain number of rows. + + The method reserves space for sz rows. If the matrix already has enough space to store sz rows, + nothing happens. If the matrix is reallocated, the first Mat::rows rows are preserved. The method + emulates the corresponding method of the STL vector class. + @param sz Number of rows. + */ + void reserve(size_t sz); + + /** @brief Reserves space for the certain number of bytes. + + The method reserves space for sz bytes. If the matrix already has enough space to store sz bytes, + nothing happens. If matrix has to be reallocated its previous content could be lost. + @param sz Number of bytes. + */ + void reserveBuffer(size_t sz); + + /** @brief Changes the number of matrix rows. + + The methods change the number of matrix rows. If the matrix is reallocated, the first + min(Mat::rows, sz) rows are preserved. The methods emulate the corresponding methods of the STL + vector class. + @param sz New number of rows. + */ + void resize(size_t sz); + + /** @overload + @param sz New number of rows. + @param s Value assigned to the newly added elements. + */ + void resize(size_t sz, const Scalar& s); + + //! internal function + void push_back_(const void* elem); + + /** @brief Adds elements to the bottom of the matrix. + + The methods add one or more elements to the bottom of the matrix. They emulate the corresponding + method of the STL vector class. When elem is Mat , its type and the number of columns must be the + same as in the container matrix. + @param elem Added element(s). + */ + template void push_back(const _Tp& elem); + + /** @overload + @param elem Added element(s). + */ + template void push_back(const Mat_<_Tp>& elem); + + /** @overload + @param elem Added element(s). + */ + template void push_back(const std::vector<_Tp>& elem); + + /** @overload + @param m Added line(s). + */ + void push_back(const Mat& m); + + /** @brief Removes elements from the bottom of the matrix. + + The method removes one or more rows from the bottom of the matrix. + @param nelems Number of removed rows. If it is greater than the total number of rows, an exception + is thrown. + */ + void pop_back(size_t nelems=1); + + /** @brief Locates the matrix header within a parent matrix. + + After you extracted a submatrix from a matrix using Mat::row, Mat::col, Mat::rowRange, + Mat::colRange, and others, the resultant submatrix points just to the part of the original big + matrix. However, each submatrix contains information (represented by datastart and dataend + fields) that helps reconstruct the original matrix size and the position of the extracted + submatrix within the original matrix. The method locateROI does exactly that. + @param wholeSize Output parameter that contains the size of the whole matrix containing *this* + as a part. + @param ofs Output parameter that contains an offset of *this* inside the whole matrix. + */ + void locateROI( Size& wholeSize, Point& ofs ) const; + + /** @brief Adjusts a submatrix size and position within the parent matrix. + + The method is complimentary to Mat::locateROI . The typical use of these functions is to determine + the submatrix position within the parent matrix and then shift the position somehow. Typically, it + can be required for filtering operations when pixels outside of the ROI should be taken into + account. When all the method parameters are positive, the ROI needs to grow in all directions by the + specified amount, for example: + @code + A.adjustROI(2, 2, 2, 2); + @endcode + In this example, the matrix size is increased by 4 elements in each direction. The matrix is shifted + by 2 elements to the left and 2 elements up, which brings in all the necessary pixels for the + filtering with the 5x5 kernel. + + adjustROI forces the adjusted ROI to be inside of the parent matrix that is boundaries of the + adjusted ROI are constrained by boundaries of the parent matrix. For example, if the submatrix A is + located in the first row of a parent matrix and you called A.adjustROI(2, 2, 2, 2) then A will not + be increased in the upward direction. + + The function is used internally by the OpenCV filtering functions, like filter2D , morphological + operations, and so on. + @param dtop Shift of the top submatrix boundary upwards. + @param dbottom Shift of the bottom submatrix boundary downwards. + @param dleft Shift of the left submatrix boundary to the left. + @param dright Shift of the right submatrix boundary to the right. + @sa copyMakeBorder + */ + Mat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + + /** @brief Extracts a rectangular submatrix. + + The operators make a new header for the specified sub-array of \*this . They are the most + generalized forms of Mat::row, Mat::col, Mat::rowRange, and Mat::colRange . For example, + `A(Range(0, 10), Range::all())` is equivalent to `A.rowRange(0, 10)`. Similarly to all of the above, + the operators are O(1) operations, that is, no matrix data is copied. + @param rowRange Start and end row of the extracted submatrix. The upper boundary is not included. To + select all the rows, use Range::all(). + @param colRange Start and end column of the extracted submatrix. The upper boundary is not included. + To select all the columns, use Range::all(). + */ + Mat operator()( Range rowRange, Range colRange ) const; + + /** @overload + @param roi Extracted submatrix specified as a rectangle. + */ + Mat operator()( const Rect& roi ) const; + + /** @overload + @param ranges Array of selected ranges along each array dimension. + */ + Mat operator()( const Range* ranges ) const; + + /** @overload + @param ranges Array of selected ranges along each array dimension. + */ + Mat operator()(const std::vector& ranges) const; + + template operator std::vector<_Tp>() const; + template operator Vec<_Tp, n>() const; + template operator Matx<_Tp, m, n>() const; + + template operator std::array<_Tp, _Nm>() const; + + /** @brief Reports whether the matrix is continuous or not. + + The method returns true if the matrix elements are stored continuously without gaps at the end of + each row. Otherwise, it returns false. Obviously, 1x1 or 1xN matrices are always continuous. + Matrices created with Mat::create are always continuous. But if you extract a part of the matrix + using Mat::col, Mat::diag, and so on, or constructed a matrix header for externally allocated data, + such matrices may no longer have this property. + + The continuity flag is stored as a bit in the Mat::flags field and is computed automatically when + you construct a matrix header. Thus, the continuity check is a very fast operation, though + theoretically it could be done as follows: + @code + // alternative implementation of Mat::isContinuous() + bool myCheckMatContinuity(const Mat& m) + { + //return (m.flags & Mat::CONTINUOUS_FLAG) != 0; + return m.rows == 1 || m.step == m.cols*m.elemSize(); + } + @endcode + The method is used in quite a few of OpenCV functions. The point is that element-wise operations + (such as arithmetic and logical operations, math functions, alpha blending, color space + transformations, and others) do not depend on the image geometry. Thus, if all the input and output + arrays are continuous, the functions can process them as very long single-row vectors. The example + below illustrates how an alpha-blending function can be implemented: + @code + template + void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst) + { + const float alpha_scale = (float)std::numeric_limits::max(), + inv_scale = 1.f/alpha_scale; + + CV_Assert( src1.type() == src2.type() && + src1.type() == CV_MAKETYPE(traits::Depth::value, 4) && + src1.size() == src2.size()); + Size size = src1.size(); + dst.create(size, src1.type()); + + // here is the idiom: check the arrays for continuity and, + // if this is the case, + // treat the arrays as 1D vectors + if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() ) + { + size.width *= size.height; + size.height = 1; + } + size.width *= 4; + + for( int i = 0; i < size.height; i++ ) + { + // when the arrays are continuous, + // the outer loop is executed only once + const T* ptr1 = src1.ptr(i); + const T* ptr2 = src2.ptr(i); + T* dptr = dst.ptr(i); + + for( int j = 0; j < size.width; j += 4 ) + { + float alpha = ptr1[j+3]*inv_scale, beta = ptr2[j+3]*inv_scale; + dptr[j] = saturate_cast(ptr1[j]*alpha + ptr2[j]*beta); + dptr[j+1] = saturate_cast(ptr1[j+1]*alpha + ptr2[j+1]*beta); + dptr[j+2] = saturate_cast(ptr1[j+2]*alpha + ptr2[j+2]*beta); + dptr[j+3] = saturate_cast((1 - (1-alpha)*(1-beta))*alpha_scale); + } + } + } + @endcode + This approach, while being very simple, can boost the performance of a simple element-operation by + 10-20 percents, especially if the image is rather small and the operation is quite simple. + + Another OpenCV idiom in this function, a call of Mat::create for the destination array, that + allocates the destination array unless it already has the proper size and type. And while the newly + allocated arrays are always continuous, you still need to check the destination array because + Mat::create does not always allocate a new matrix. + */ + bool isContinuous() const; + + //! returns true if the matrix is a submatrix of another matrix + bool isSubmatrix() const; + + /** @brief Returns the matrix element size in bytes. + + The method returns the matrix element size in bytes. For example, if the matrix type is CV_16SC3 , + the method returns 3\*sizeof(short) or 6. + */ + size_t elemSize() const; + + /** @brief Returns the size of each matrix element channel in bytes. + + The method returns the matrix element channel size in bytes, that is, it ignores the number of + channels. For example, if the matrix type is CV_16SC3 , the method returns sizeof(short) or 2. + */ + size_t elemSize1() const; + + /** @brief Returns the type of a matrix element. + + The method returns a matrix element type. This is an identifier compatible with the CvMat type + system, like CV_16SC3 or 16-bit signed 3-channel array, and so on. + */ + int type() const; + + /** @brief Returns the depth of a matrix element. + + The method returns the identifier of the matrix element depth (the type of each individual channel). + For example, for a 16-bit signed element array, the method returns CV_16S . A complete list of + matrix types contains the following values: + - CV_8U - 8-bit unsigned integers ( 0..255 ) + - CV_8S - 8-bit signed integers ( -128..127 ) + - CV_16U - 16-bit unsigned integers ( 0..65535 ) + - CV_16S - 16-bit signed integers ( -32768..32767 ) + - CV_32S - 32-bit signed integers ( -2147483648..2147483647 ) + - CV_32F - 32-bit floating-point numbers ( -FLT_MAX..FLT_MAX, INF, NAN ) + - CV_64F - 64-bit floating-point numbers ( -DBL_MAX..DBL_MAX, INF, NAN ) + */ + int depth() const; + + /** @brief Returns the number of matrix channels. + + The method returns the number of matrix channels. + */ + int channels() const; + + /** @brief Returns a normalized step. + + The method returns a matrix step divided by Mat::elemSize1() . It can be useful to quickly access an + arbitrary matrix element. + */ + size_t step1(int i=0) const; + + /** @brief Returns true if the array has no elements. + + The method returns true if Mat::total() is 0 or if Mat::data is NULL. Because of pop_back() and + resize() methods `M.total() == 0` does not imply that `M.data == NULL`. + */ + bool empty() const; + + /** @brief Returns the total number of array elements. + + The method returns the number of array elements (a number of pixels if the array represents an + image). + */ + size_t total() const; + + /** @brief Returns the total number of array elements. + + The method returns the number of elements within a certain sub-array slice with startDim <= dim < endDim + */ + size_t total(int startDim, int endDim=INT_MAX) const; + + /** + * @param elemChannels Number of channels or number of columns the matrix should have. + * For a 2-D matrix, when the matrix has only 1 column, then it should have + * elemChannels channels; When the matrix has only 1 channel, + * then it should have elemChannels columns. + * For a 3-D matrix, it should have only one channel. Furthermore, + * if the number of planes is not one, then the number of rows + * within every plane has to be 1; if the number of rows within + * every plane is not 1, then the number of planes has to be 1. + * @param depth The depth the matrix should have. Set it to -1 when any depth is fine. + * @param requireContinuous Set it to true to require the matrix to be continuous + * @return -1 if the requirement is not satisfied. + * Otherwise, it returns the number of elements in the matrix. Note + * that an element may have multiple channels. + * + * The following code demonstrates its usage for a 2-d matrix: + * @snippet snippets/core_mat_checkVector.cpp example-2d + * + * The following code demonstrates its usage for a 3-d matrix: + * @snippet snippets/core_mat_checkVector.cpp example-3d + */ + int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const; + + /** @brief Returns a pointer to the specified matrix row. + + The methods return `uchar*` or typed pointer to the specified matrix row. See the sample in + Mat::isContinuous to know how to use these methods. + @param i0 A 0-based row index. + */ + uchar* ptr(int i0=0); + /** @overload */ + const uchar* ptr(int i0=0) const; + + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + uchar* ptr(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + const uchar* ptr(int row, int col) const; + + /** @overload */ + uchar* ptr(int i0, int i1, int i2); + /** @overload */ + const uchar* ptr(int i0, int i1, int i2) const; + + /** @overload */ + uchar* ptr(const int* idx); + /** @overload */ + const uchar* ptr(const int* idx) const; + /** @overload */ + template uchar* ptr(const Vec& idx); + /** @overload */ + template const uchar* ptr(const Vec& idx) const; + + /** @overload */ + template _Tp* ptr(int i0=0); + /** @overload */ + template const _Tp* ptr(int i0=0) const; + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template _Tp* ptr(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template const _Tp* ptr(int row, int col) const; + /** @overload */ + template _Tp* ptr(int i0, int i1, int i2); + /** @overload */ + template const _Tp* ptr(int i0, int i1, int i2) const; + /** @overload */ + template _Tp* ptr(const int* idx); + /** @overload */ + template const _Tp* ptr(const int* idx) const; + /** @overload */ + template _Tp* ptr(const Vec& idx); + /** @overload */ + template const _Tp* ptr(const Vec& idx) const; + + /** @brief Returns a reference to the specified array element. + + The template methods return a reference to the specified array element. For the sake of higher + performance, the index range checks are only performed in the Debug configuration. + + Note that the variants with a single index (i) can be used to access elements of single-row or + single-column 2-dimensional arrays. That is, if, for example, A is a 1 x N floating-point matrix and + B is an M x 1 integer matrix, you can simply write `A.at(k+4)` and `B.at(2*i+1)` + instead of `A.at(0,k+4)` and `B.at(2*i+1,0)`, respectively. + + The example below initializes a Hilbert matrix: + @code + Mat H(100, 100, CV_64F); + for(int i = 0; i < H.rows; i++) + for(int j = 0; j < H.cols; j++) + H.at(i,j)=1./(i+j+1); + @endcode + + Keep in mind that the size identifier used in the at operator cannot be chosen at random. It depends + on the image from which you are trying to retrieve the data. The table below gives a better insight in this: + - If matrix is of type `CV_8U` then use `Mat.at(y,x)`. + - If matrix is of type `CV_8S` then use `Mat.at(y,x)`. + - If matrix is of type `CV_16U` then use `Mat.at(y,x)`. + - If matrix is of type `CV_16S` then use `Mat.at(y,x)`. + - If matrix is of type `CV_32S` then use `Mat.at(y,x)`. + - If matrix is of type `CV_32F` then use `Mat.at(y,x)`. + - If matrix is of type `CV_64F` then use `Mat.at(y,x)`. + + @param i0 Index along the dimension 0 + */ + template _Tp& at(int i0=0); + /** @overload + @param i0 Index along the dimension 0 + */ + template const _Tp& at(int i0=0) const; + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template _Tp& at(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template const _Tp& at(int row, int col) const; + + /** @overload + @param i0 Index along the dimension 0 + @param i1 Index along the dimension 1 + @param i2 Index along the dimension 2 + */ + template _Tp& at(int i0, int i1, int i2); + /** @overload + @param i0 Index along the dimension 0 + @param i1 Index along the dimension 1 + @param i2 Index along the dimension 2 + */ + template const _Tp& at(int i0, int i1, int i2) const; + + /** @overload + @param idx Array of Mat::dims indices. + */ + template _Tp& at(const int* idx); + /** @overload + @param idx Array of Mat::dims indices. + */ + template const _Tp& at(const int* idx) const; + + /** @overload */ + template _Tp& at(const Vec& idx); + /** @overload */ + template const _Tp& at(const Vec& idx) const; + + /** @overload + special versions for 2D arrays (especially convenient for referencing image pixels) + @param pt Element position specified as Point(j,i) . + */ + template _Tp& at(Point pt); + /** @overload + special versions for 2D arrays (especially convenient for referencing image pixels) + @param pt Element position specified as Point(j,i) . + */ + template const _Tp& at(Point pt) const; + + /** @brief Returns the matrix iterator and sets it to the first matrix element. + + The methods return the matrix read-only or read-write iterators. The use of matrix iterators is very + similar to the use of bi-directional STL iterators. In the example below, the alpha blending + function is rewritten using the matrix iterators: + @code + template + void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst) + { + typedef Vec VT; + + const float alpha_scale = (float)std::numeric_limits::max(), + inv_scale = 1.f/alpha_scale; + + CV_Assert( src1.type() == src2.type() && + src1.type() == traits::Type::value && + src1.size() == src2.size()); + Size size = src1.size(); + dst.create(size, src1.type()); + + MatConstIterator_ it1 = src1.begin(), it1_end = src1.end(); + MatConstIterator_ it2 = src2.begin(); + MatIterator_ dst_it = dst.begin(); + + for( ; it1 != it1_end; ++it1, ++it2, ++dst_it ) + { + VT pix1 = *it1, pix2 = *it2; + float alpha = pix1[3]*inv_scale, beta = pix2[3]*inv_scale; + *dst_it = VT(saturate_cast(pix1[0]*alpha + pix2[0]*beta), + saturate_cast(pix1[1]*alpha + pix2[1]*beta), + saturate_cast(pix1[2]*alpha + pix2[2]*beta), + saturate_cast((1 - (1-alpha)*(1-beta))*alpha_scale)); + } + } + @endcode + */ + template MatIterator_<_Tp> begin(); + template MatConstIterator_<_Tp> begin() const; + + /** @brief Returns the matrix iterator and sets it to the after-last matrix element. + + The methods return the matrix read-only or read-write iterators, set to the point following the last + matrix element. + */ + template MatIterator_<_Tp> end(); + template MatConstIterator_<_Tp> end() const; + + /** @brief Runs the given functor over all matrix elements in parallel. + + The operation passed as argument has to be a function pointer, a function object or a lambda(C++11). + + Example 1. All of the operations below put 0xFF the first channel of all matrix elements: + @code + Mat image(1920, 1080, CV_8UC3); + typedef cv::Point3_ Pixel; + + // first. raw pointer access. + for (int r = 0; r < image.rows; ++r) { + Pixel* ptr = image.ptr(r, 0); + const Pixel* ptr_end = ptr + image.cols; + for (; ptr != ptr_end; ++ptr) { + ptr->x = 255; + } + } + + // Using MatIterator. (Simple but there are a Iterator's overhead) + for (Pixel &p : cv::Mat_(image)) { + p.x = 255; + } + + // Parallel execution with function object. + struct Operator { + void operator ()(Pixel &pixel, const int * position) { + pixel.x = 255; + } + }; + image.forEach(Operator()); + + // Parallel execution using C++11 lambda. + image.forEach([](Pixel &p, const int * position) -> void { + p.x = 255; + }); + @endcode + Example 2. Using the pixel's position: + @code + // Creating 3D matrix (255 x 255 x 255) typed uint8_t + // and initialize all elements by the value which equals elements position. + // i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3). + + int sizes[] = { 255, 255, 255 }; + typedef cv::Point3_ Pixel; + + Mat_ image = Mat::zeros(3, sizes, CV_8UC3); + + image.forEach([&](Pixel& pixel, const int position[]) -> void { + pixel.x = position[0]; + pixel.y = position[1]; + pixel.z = position[2]; + }); + @endcode + */ + template void forEach(const Functor& operation); + /** @overload */ + template void forEach(const Functor& operation) const; + + Mat(Mat&& m); + Mat& operator = (Mat&& m); + + enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; + enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 }; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the matrix dimensionality, >= 2 + int dims; + //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions + int rows, cols; + //! pointer to the data + uchar* data; + + //! helper fields used in locateROI and adjustROI + const uchar* datastart; + const uchar* dataend; + const uchar* datalimit; + + //! custom allocator + MatAllocator* allocator; + //! and the standard allocator + static MatAllocator* getStdAllocator(); + static MatAllocator* getDefaultAllocator(); + static void setDefaultAllocator(MatAllocator* allocator); + + //! internal use method: updates the continuity flag + void updateContinuityFlag(); + + //! interaction with UMat + UMatData* u; + + MatSize size; + MatStep step; + +protected: + template void forEach_impl(const Functor& operation); +}; + + +///////////////////////////////// Mat_<_Tp> //////////////////////////////////// + +/** @brief Template matrix class derived from Mat + +@code{.cpp} + template class Mat_ : public Mat + { + public: + // ... some specific methods + // and + // no new extra fields + }; +@endcode +The class `Mat_<_Tp>` is a *thin* template wrapper on top of the Mat class. It does not have any +extra data fields. Nor this class nor Mat has any virtual methods. Thus, references or pointers to +these two classes can be freely but carefully converted one to another. For example: +@code{.cpp} + // create a 100x100 8-bit matrix + Mat M(100,100,CV_8U); + // this will be compiled fine. no any data conversion will be done. + Mat_& M1 = (Mat_&)M; + // the program is likely to crash at the statement below + M1(99,99) = 1.f; +@endcode +While Mat is sufficient in most cases, Mat_ can be more convenient if you use a lot of element +access operations and if you know matrix type at the compilation time. Note that +`Mat::at(int y,int x)` and `Mat_::operator()(int y,int x)` do absolutely the same +and run at the same speed, but the latter is certainly shorter: +@code{.cpp} + Mat_ M(20,20); + for(int i = 0; i < M.rows; i++) + for(int j = 0; j < M.cols; j++) + M(i,j) = 1./(i+j+1); + Mat E, V; + eigen(M,E,V); + cout << E.at(0,0)/E.at(M.rows-1,0); +@endcode +To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter: +@code{.cpp} + // allocate a 320x240 color image and fill it with green (in RGB space) + Mat_ img(240, 320, Vec3b(0,255,0)); + // now draw a diagonal white line + for(int i = 0; i < 100; i++) + img(i,i)=Vec3b(255,255,255); + // and now scramble the 2nd (red) channel of each pixel + for(int i = 0; i < img.rows; i++) + for(int j = 0; j < img.cols; j++) + img(i,j)[2] ^= (uchar)(i ^ j); +@endcode +Mat_ is fully compatible with C++11 range-based for loop. For example such loop +can be used to safely apply look-up table: +@code{.cpp} +void applyTable(Mat_& I, const uchar* const table) +{ + for(auto& pixel : I) + { + pixel = table[pixel]; + } +} +@endcode + */ +template class Mat_ : public Mat +{ +public: + typedef _Tp value_type; + typedef typename DataType<_Tp>::channel_type channel_type; + typedef MatIterator_<_Tp> iterator; + typedef MatConstIterator_<_Tp> const_iterator; + + //! default constructor + Mat_(); + //! equivalent to Mat(_rows, _cols, DataType<_Tp>::type) + Mat_(int _rows, int _cols); + //! constructor that sets each matrix element to specified value + Mat_(int _rows, int _cols, const _Tp& value); + //! equivalent to Mat(_size, DataType<_Tp>::type) + explicit Mat_(Size _size); + //! constructor that sets each matrix element to specified value + Mat_(Size _size, const _Tp& value); + //! n-dim array constructor + Mat_(int _ndims, const int* _sizes); + //! n-dim array constructor that sets each matrix element to specified value + Mat_(int _ndims, const int* _sizes, const _Tp& value); + //! copy/conversion constructor. If m is of different type, it's converted + Mat_(const Mat& m); + //! copy constructor + Mat_(const Mat_& m); + //! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type + Mat_(int _rows, int _cols, _Tp* _data, size_t _step=AUTO_STEP); + //! constructs n-dim matrix on top of user-allocated data. steps are in bytes(!!!), regardless of the type + Mat_(int _ndims, const int* _sizes, _Tp* _data, const size_t* _steps=0); + //! selects a submatrix + Mat_(const Mat_& m, const Range& rowRange, const Range& colRange=Range::all()); + //! selects a submatrix + Mat_(const Mat_& m, const Rect& roi); + //! selects a submatrix, n-dim version + Mat_(const Mat_& m, const Range* ranges); + //! selects a submatrix, n-dim version + Mat_(const Mat_& m, const std::vector& ranges); + //! from a matrix expression + explicit Mat_(const MatExpr& e); + //! makes a matrix out of Vec, std::vector, Point_ or Point3_. The matrix will have a single column + explicit Mat_(const std::vector<_Tp>& vec, bool copyData=false); + template explicit Mat_(const Vec::channel_type, n>& vec, bool copyData=true); + template explicit Mat_(const Matx::channel_type, m, n>& mtx, bool copyData=true); + explicit Mat_(const Point_::channel_type>& pt, bool copyData=true); + explicit Mat_(const Point3_::channel_type>& pt, bool copyData=true); + explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer); + + Mat_(std::initializer_list<_Tp> values); + explicit Mat_(const std::initializer_list sizes, const std::initializer_list<_Tp> values); + + template explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false); + + Mat_& operator = (const Mat& m); + Mat_& operator = (const Mat_& m); + //! set all the elements to s. + Mat_& operator = (const _Tp& s); + //! assign a matrix expression + Mat_& operator = (const MatExpr& e); + + //! iterators; they are smart enough to skip gaps in the end of rows + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + + //! template methods for for operation over all matrix elements. + // the operations take care of skipping gaps in the end of rows (if any) + template void forEach(const Functor& operation); + template void forEach(const Functor& operation) const; + + //! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type) + void create(int _rows, int _cols); + //! equivalent to Mat::create(_size, DataType<_Tp>::type) + void create(Size _size); + //! equivalent to Mat::create(_ndims, _sizes, DatType<_Tp>::type) + void create(int _ndims, const int* _sizes); + //! equivalent to Mat::release() + void release(); + //! cross-product + Mat_ cross(const Mat_& m) const; + //! data type conversion + template operator Mat_() const; + //! overridden forms of Mat::row() etc. + Mat_ row(int y) const; + Mat_ col(int x) const; + Mat_ diag(int d=0) const; + Mat_ clone() const CV_NODISCARD; + + //! overridden forms of Mat::elemSize() etc. + size_t elemSize() const; + size_t elemSize1() const; + int type() const; + int depth() const; + int channels() const; + size_t step1(int i=0) const; + //! returns step()/sizeof(_Tp) + size_t stepT(int i=0) const; + + //! overridden forms of Mat::zeros() etc. Data type is omitted, of course + static MatExpr zeros(int rows, int cols); + static MatExpr zeros(Size size); + static MatExpr zeros(int _ndims, const int* _sizes); + static MatExpr ones(int rows, int cols); + static MatExpr ones(Size size); + static MatExpr ones(int _ndims, const int* _sizes); + static MatExpr eye(int rows, int cols); + static MatExpr eye(Size size); + + //! some more overridden methods + Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright ); + Mat_ operator()( const Range& rowRange, const Range& colRange ) const; + Mat_ operator()( const Rect& roi ) const; + Mat_ operator()( const Range* ranges ) const; + Mat_ operator()(const std::vector& ranges) const; + + //! more convenient forms of row and element access operators + _Tp* operator [](int y); + const _Tp* operator [](int y) const; + + //! returns reference to the specified element + _Tp& operator ()(const int* idx); + //! returns read-only reference to the specified element + const _Tp& operator ()(const int* idx) const; + + //! returns reference to the specified element + template _Tp& operator ()(const Vec& idx); + //! returns read-only reference to the specified element + template const _Tp& operator ()(const Vec& idx) const; + + //! returns reference to the specified element (1D case) + _Tp& operator ()(int idx0); + //! returns read-only reference to the specified element (1D case) + const _Tp& operator ()(int idx0) const; + //! returns reference to the specified element (2D case) + _Tp& operator ()(int row, int col); + //! returns read-only reference to the specified element (2D case) + const _Tp& operator ()(int row, int col) const; + //! returns reference to the specified element (3D case) + _Tp& operator ()(int idx0, int idx1, int idx2); + //! returns read-only reference to the specified element (3D case) + const _Tp& operator ()(int idx0, int idx1, int idx2) const; + + _Tp& operator ()(Point pt); + const _Tp& operator ()(Point pt) const; + + //! conversion to vector. + operator std::vector<_Tp>() const; + + //! conversion to array. + template operator std::array<_Tp, _Nm>() const; + + //! conversion to Vec + template operator Vec::channel_type, n>() const; + //! conversion to Matx + template operator Matx::channel_type, m, n>() const; + + Mat_(Mat_&& m); + Mat_& operator = (Mat_&& m); + + Mat_(Mat&& m); + Mat_& operator = (Mat&& m); + + Mat_(MatExpr&& e); +}; + +typedef Mat_ Mat1b; +typedef Mat_ Mat2b; +typedef Mat_ Mat3b; +typedef Mat_ Mat4b; + +typedef Mat_ Mat1s; +typedef Mat_ Mat2s; +typedef Mat_ Mat3s; +typedef Mat_ Mat4s; + +typedef Mat_ Mat1w; +typedef Mat_ Mat2w; +typedef Mat_ Mat3w; +typedef Mat_ Mat4w; + +typedef Mat_ Mat1i; +typedef Mat_ Mat2i; +typedef Mat_ Mat3i; +typedef Mat_ Mat4i; + +typedef Mat_ Mat1f; +typedef Mat_ Mat2f; +typedef Mat_ Mat3f; +typedef Mat_ Mat4f; + +typedef Mat_ Mat1d; +typedef Mat_ Mat2d; +typedef Mat_ Mat3d; +typedef Mat_ Mat4d; + +/** @todo document */ +class CV_EXPORTS UMat +{ +public: + //! default constructor + UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT); + //! constructs 2D matrix of the specified size and type + // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) + UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + UMat(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + //! constructs 2D matrix and fills it with the specified value _s. + UMat(int rows, int cols, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); + UMat(Size size, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); + + //! constructs n-dimensional matrix + UMat(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + UMat(int ndims, const int* sizes, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); + + //! copy constructor + UMat(const UMat& m); + + //! creates a matrix header for a part of the bigger matrix + UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all()); + UMat(const UMat& m, const Rect& roi); + UMat(const UMat& m, const Range* ranges); + UMat(const UMat& m, const std::vector& ranges); + + // FIXIT copyData=false is not implemented, drop this in favor of cv::Mat (OpenCV 5.0) + //! builds matrix from std::vector with or without copying the data + template explicit UMat(const std::vector<_Tp>& vec, bool copyData=false); + + //! destructor - calls release() + ~UMat(); + //! assignment operators + UMat& operator = (const UMat& m); + + Mat getMat(AccessFlag flags) const; + + //! returns a new matrix header for the specified row + UMat row(int y) const; + //! returns a new matrix header for the specified column + UMat col(int x) const; + //! ... for the specified row span + UMat rowRange(int startrow, int endrow) const; + UMat rowRange(const Range& r) const; + //! ... for the specified column span + UMat colRange(int startcol, int endcol) const; + UMat colRange(const Range& r) const; + //! ... for the specified diagonal + //! (d=0 - the main diagonal, + //! >0 - a diagonal from the upper half, + //! <0 - a diagonal from the lower half) + UMat diag(int d=0) const; + //! constructs a square diagonal matrix which main diagonal is vector "d" + static UMat diag(const UMat& d); + + //! returns deep copy of the matrix, i.e. the data is copied + UMat clone() const CV_NODISCARD; + //! copies the matrix content to "m". + // It calls m.create(this->size(), this->type()). + void copyTo( OutputArray m ) const; + //! copies those matrix elements to "m" that are marked with non-zero mask elements. + void copyTo( OutputArray m, InputArray mask ) const; + //! converts matrix to another datatype with optional scaling. See cvConvertScale. + void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const; + + void assignTo( UMat& m, int type=-1 ) const; + + //! sets every matrix element to s + UMat& operator = (const Scalar& s); + //! sets some of the matrix elements to s, according to the mask + UMat& setTo(InputArray value, InputArray mask=noArray()); + //! creates alternative matrix header for the same data, with different + // number of channels and/or different number of rows. see cvReshape. + UMat reshape(int cn, int rows=0) const; + UMat reshape(int cn, int newndims, const int* newsz) const; + + //! matrix transposition by means of matrix expressions + UMat t() const; + //! matrix inversion by means of matrix expressions + UMat inv(int method=DECOMP_LU) const; + //! per-element matrix multiplication by means of matrix expressions + UMat mul(InputArray m, double scale=1) const; + + //! computes dot-product + double dot(InputArray m) const; + + //! Matlab-style matrix initialization + static UMat zeros(int rows, int cols, int type); + static UMat zeros(Size size, int type); + static UMat zeros(int ndims, const int* sz, int type); + static UMat ones(int rows, int cols, int type); + static UMat ones(Size size, int type); + static UMat ones(int ndims, const int* sz, int type); + static UMat eye(int rows, int cols, int type); + static UMat eye(Size size, int type); + + //! allocates new matrix data unless the matrix already has specified size and type. + // previous data is unreferenced if needed. + void create(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + void create(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + void create(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + void create(const std::vector& sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + + //! increases the reference counter; use with care to avoid memleaks + void addref(); + //! decreases reference counter; + // deallocates the data when reference counter reaches 0. + void release(); + + //! deallocates the matrix data + void deallocate(); + //! internal use function; properly re-allocates _size, _step arrays + void copySize(const UMat& m); + + //! locates matrix header within a parent matrix. See below + void locateROI( Size& wholeSize, Point& ofs ) const; + //! moves/resizes the current matrix ROI inside the parent matrix. + UMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + //! extracts a rectangular sub-matrix + // (this is a generalized form of row, rowRange etc.) + UMat operator()( Range rowRange, Range colRange ) const; + UMat operator()( const Rect& roi ) const; + UMat operator()( const Range* ranges ) const; + UMat operator()(const std::vector& ranges) const; + + //! returns true iff the matrix data is continuous + // (i.e. when there are no gaps between successive rows). + // similar to CV_IS_MAT_CONT(cvmat->type) + bool isContinuous() const; + + //! returns true if the matrix is a submatrix of another matrix + bool isSubmatrix() const; + + //! returns element size in bytes, + // similar to CV_ELEM_SIZE(cvmat->type) + size_t elemSize() const; + //! returns the size of element channel in bytes. + size_t elemSize1() const; + //! returns element type, similar to CV_MAT_TYPE(cvmat->type) + int type() const; + //! returns element type, similar to CV_MAT_DEPTH(cvmat->type) + int depth() const; + //! returns element type, similar to CV_MAT_CN(cvmat->type) + int channels() const; + //! returns step/elemSize1() + size_t step1(int i=0) const; + //! returns true if matrix data is NULL + bool empty() const; + //! returns the total number of matrix elements + size_t total() const; + + //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise + int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const; + + UMat(UMat&& m); + UMat& operator = (UMat&& m); + + /*! Returns the OpenCL buffer handle on which UMat operates on. + The UMat instance should be kept alive during the use of the handle to prevent the buffer to be + returned to the OpenCV buffer pool. + */ + void* handle(AccessFlag accessFlags) const; + void ndoffset(size_t* ofs) const; + + enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; + enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 }; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the matrix dimensionality, >= 2 + int dims; + //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions + int rows, cols; + + //! custom allocator + MatAllocator* allocator; + UMatUsageFlags usageFlags; // usage flags for allocator + //! and the standard allocator + static MatAllocator* getStdAllocator(); + + //! internal use method: updates the continuity flag + void updateContinuityFlag(); + + // black-box container of UMat data + UMatData* u; + + // offset of the submatrix (or 0) + size_t offset; + + MatSize size; + MatStep step; + +protected: +}; + + +/////////////////////////// multi-dimensional sparse matrix ////////////////////////// + +/** @brief The class SparseMat represents multi-dimensional sparse numerical arrays. + +Such a sparse array can store elements of any type that Mat can store. *Sparse* means that only +non-zero elements are stored (though, as a result of operations on a sparse matrix, some of its +stored elements can actually become 0. It is up to you to detect such elements and delete them +using SparseMat::erase ). The non-zero elements are stored in a hash table that grows when it is +filled so that the search time is O(1) in average (regardless of whether element is there or not). +Elements can be accessed using the following methods: +- Query operations (SparseMat::ptr and the higher-level SparseMat::ref, SparseMat::value and + SparseMat::find), for example: + @code + const int dims = 5; + int size[5] = {10, 10, 10, 10, 10}; + SparseMat sparse_mat(dims, size, CV_32F); + for(int i = 0; i < 1000; i++) + { + int idx[dims]; + for(int k = 0; k < dims; k++) + idx[k] = rand() % size[k]; + sparse_mat.ref(idx) += 1.f; + } + cout << "nnz = " << sparse_mat.nzcount() << endl; + @endcode +- Sparse matrix iterators. They are similar to MatIterator but different from NAryMatIterator. + That is, the iteration loop is familiar to STL users: + @code + // prints elements of a sparse floating-point matrix + // and the sum of elements. + SparseMatConstIterator_ + it = sparse_mat.begin(), + it_end = sparse_mat.end(); + double s = 0; + int dims = sparse_mat.dims(); + for(; it != it_end; ++it) + { + // print element indices and the element value + const SparseMat::Node* n = it.node(); + printf("("); + for(int i = 0; i < dims; i++) + printf("%d%s", n->idx[i], i < dims-1 ? ", " : ")"); + printf(": %g\n", it.value()); + s += *it; + } + printf("Element sum is %g\n", s); + @endcode + If you run this loop, you will notice that elements are not enumerated in a logical order + (lexicographical, and so on). They come in the same order as they are stored in the hash table + (semi-randomly). You may collect pointers to the nodes and sort them to get the proper ordering. + Note, however, that pointers to the nodes may become invalid when you add more elements to the + matrix. This may happen due to possible buffer reallocation. +- Combination of the above 2 methods when you need to process 2 or more sparse matrices + simultaneously. For example, this is how you can compute unnormalized cross-correlation of the 2 + floating-point sparse matrices: + @code + double cross_corr(const SparseMat& a, const SparseMat& b) + { + const SparseMat *_a = &a, *_b = &b; + // if b contains less elements than a, + // it is faster to iterate through b + if(_a->nzcount() > _b->nzcount()) + std::swap(_a, _b); + SparseMatConstIterator_ it = _a->begin(), + it_end = _a->end(); + double ccorr = 0; + for(; it != it_end; ++it) + { + // take the next element from the first matrix + float avalue = *it; + const Node* anode = it.node(); + // and try to find an element with the same index in the second matrix. + // since the hash value depends only on the element index, + // reuse the hash value stored in the node + float bvalue = _b->value(anode->idx,&anode->hashval); + ccorr += avalue*bvalue; + } + return ccorr; + } + @endcode + */ +class CV_EXPORTS SparseMat +{ +public: + typedef SparseMatIterator iterator; + typedef SparseMatConstIterator const_iterator; + + enum { MAGIC_VAL=0x42FD0000, MAX_DIM=32, HASH_SCALE=0x5bd1e995, HASH_BIT=0x80000000 }; + + //! the sparse matrix header + struct CV_EXPORTS Hdr + { + Hdr(int _dims, const int* _sizes, int _type); + void clear(); + int refcount; + int dims; + int valueOffset; + size_t nodeSize; + size_t nodeCount; + size_t freeList; + std::vector pool; + std::vector hashtab; + int size[MAX_DIM]; + }; + + //! sparse matrix node - element of a hash table + struct CV_EXPORTS Node + { + //! hash value + size_t hashval; + //! index of the next node in the same hash table entry + size_t next; + //! index of the matrix element + int idx[MAX_DIM]; + }; + + /** @brief Various SparseMat constructors. + */ + SparseMat(); + + /** @overload + @param dims Array dimensionality. + @param _sizes Sparce matrix size on all dementions. + @param _type Sparse matrix data type. + */ + SparseMat(int dims, const int* _sizes, int _type); + + /** @overload + @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted + to sparse representation. + */ + SparseMat(const SparseMat& m); + + /** @overload + @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted + to sparse representation. + */ + explicit SparseMat(const Mat& m); + + //! the destructor + ~SparseMat(); + + //! assignment operator. This is O(1) operation, i.e. no data is copied + SparseMat& operator = (const SparseMat& m); + //! equivalent to the corresponding constructor + SparseMat& operator = (const Mat& m); + + //! creates full copy of the matrix + SparseMat clone() const CV_NODISCARD; + + //! copies all the data to the destination matrix. All the previous content of m is erased + void copyTo( SparseMat& m ) const; + //! converts sparse matrix to dense matrix. + void copyTo( Mat& m ) const; + //! multiplies all the matrix elements by the specified scale factor alpha and converts the results to the specified data type + void convertTo( SparseMat& m, int rtype, double alpha=1 ) const; + //! converts sparse matrix to dense n-dim matrix with optional type conversion and scaling. + /*! + @param [out] m - output matrix; if it does not have a proper size or type before the operation, + it is reallocated + @param [in] rtype - desired output matrix type or, rather, the depth since the number of channels + are the same as the input has; if rtype is negative, the output matrix will have the + same type as the input. + @param [in] alpha - optional scale factor + @param [in] beta - optional delta added to the scaled values + */ + void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const; + + // not used now + void assignTo( SparseMat& m, int type=-1 ) const; + + //! reallocates sparse matrix. + /*! + If the matrix already had the proper size and type, + it is simply cleared with clear(), otherwise, + the old matrix is released (using release()) and the new one is allocated. + */ + void create(int dims, const int* _sizes, int _type); + //! sets all the sparse matrix elements to 0, which means clearing the hash table. + void clear(); + //! manually increments the reference counter to the header. + void addref(); + // decrements the header reference counter. When the counter reaches 0, the header and all the underlying data are deallocated. + void release(); + + //! converts sparse matrix to the old-style representation; all the elements are copied. + //operator CvSparseMat*() const; + //! returns the size of each element in bytes (not including the overhead - the space occupied by SparseMat::Node elements) + size_t elemSize() const; + //! returns elemSize()/channels() + size_t elemSize1() const; + + //! returns type of sparse matrix elements + int type() const; + //! returns the depth of sparse matrix elements + int depth() const; + //! returns the number of channels + int channels() const; + + //! returns the array of sizes, or NULL if the matrix is not allocated + const int* size() const; + //! returns the size of i-th matrix dimension (or 0) + int size(int i) const; + //! returns the matrix dimensionality + int dims() const; + //! returns the number of non-zero elements (=the number of hash table nodes) + size_t nzcount() const; + + //! computes the element hash value (1D case) + size_t hash(int i0) const; + //! computes the element hash value (2D case) + size_t hash(int i0, int i1) const; + //! computes the element hash value (3D case) + size_t hash(int i0, int i1, int i2) const; + //! computes the element hash value (nD case) + size_t hash(const int* idx) const; + + //!@{ + /*! + specialized variants for 1D, 2D, 3D cases and the generic_type one for n-D case. + return pointer to the matrix element. + - if the element is there (it's non-zero), the pointer to it is returned + - if it's not there and createMissing=false, NULL pointer is returned + - if it's not there and createMissing=true, then the new element + is created and initialized with 0. Pointer to it is returned + - if the optional hashval pointer is not NULL, the element hash value is + not computed, but *hashval is taken instead. + */ + //! returns pointer to the specified element (1D case) + uchar* ptr(int i0, bool createMissing, size_t* hashval=0); + //! returns pointer to the specified element (2D case) + uchar* ptr(int i0, int i1, bool createMissing, size_t* hashval=0); + //! returns pointer to the specified element (3D case) + uchar* ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0); + //! returns pointer to the specified element (nD case) + uchar* ptr(const int* idx, bool createMissing, size_t* hashval=0); + //!@} + + //!@{ + /*! + return read-write reference to the specified sparse matrix element. + + `ref<_Tp>(i0,...[,hashval])` is equivalent to `*(_Tp*)ptr(i0,...,true[,hashval])`. + The methods always return a valid reference. + If the element did not exist, it is created and initialized with 0. + */ + //! returns reference to the specified element (1D case) + template _Tp& ref(int i0, size_t* hashval=0); + //! returns reference to the specified element (2D case) + template _Tp& ref(int i0, int i1, size_t* hashval=0); + //! returns reference to the specified element (3D case) + template _Tp& ref(int i0, int i1, int i2, size_t* hashval=0); + //! returns reference to the specified element (nD case) + template _Tp& ref(const int* idx, size_t* hashval=0); + //!@} + + //!@{ + /*! + return value of the specified sparse matrix element. + + `value<_Tp>(i0,...[,hashval])` is equivalent to + @code + { const _Tp* p = find<_Tp>(i0,...[,hashval]); return p ? *p : _Tp(); } + @endcode + + That is, if the element did not exist, the methods return 0. + */ + //! returns value of the specified element (1D case) + template _Tp value(int i0, size_t* hashval=0) const; + //! returns value of the specified element (2D case) + template _Tp value(int i0, int i1, size_t* hashval=0) const; + //! returns value of the specified element (3D case) + template _Tp value(int i0, int i1, int i2, size_t* hashval=0) const; + //! returns value of the specified element (nD case) + template _Tp value(const int* idx, size_t* hashval=0) const; + //!@} + + //!@{ + /*! + Return pointer to the specified sparse matrix element if it exists + + `find<_Tp>(i0,...[,hashval])` is equivalent to `(_const Tp*)ptr(i0,...false[,hashval])`. + + If the specified element does not exist, the methods return NULL. + */ + //! returns pointer to the specified element (1D case) + template const _Tp* find(int i0, size_t* hashval=0) const; + //! returns pointer to the specified element (2D case) + template const _Tp* find(int i0, int i1, size_t* hashval=0) const; + //! returns pointer to the specified element (3D case) + template const _Tp* find(int i0, int i1, int i2, size_t* hashval=0) const; + //! returns pointer to the specified element (nD case) + template const _Tp* find(const int* idx, size_t* hashval=0) const; + //!@} + + //! erases the specified element (2D case) + void erase(int i0, int i1, size_t* hashval=0); + //! erases the specified element (3D case) + void erase(int i0, int i1, int i2, size_t* hashval=0); + //! erases the specified element (nD case) + void erase(const int* idx, size_t* hashval=0); + + //!@{ + /*! + return the sparse matrix iterator pointing to the first sparse matrix element + */ + //! returns the sparse matrix iterator at the matrix beginning + SparseMatIterator begin(); + //! returns the sparse matrix iterator at the matrix beginning + template SparseMatIterator_<_Tp> begin(); + //! returns the read-only sparse matrix iterator at the matrix beginning + SparseMatConstIterator begin() const; + //! returns the read-only sparse matrix iterator at the matrix beginning + template SparseMatConstIterator_<_Tp> begin() const; + //!@} + /*! + return the sparse matrix iterator pointing to the element following the last sparse matrix element + */ + //! returns the sparse matrix iterator at the matrix end + SparseMatIterator end(); + //! returns the read-only sparse matrix iterator at the matrix end + SparseMatConstIterator end() const; + //! returns the typed sparse matrix iterator at the matrix end + template SparseMatIterator_<_Tp> end(); + //! returns the typed read-only sparse matrix iterator at the matrix end + template SparseMatConstIterator_<_Tp> end() const; + + //! returns the value stored in the sparse martix node + template _Tp& value(Node* n); + //! returns the value stored in the sparse martix node + template const _Tp& value(const Node* n) const; + + ////////////// some internal-use methods /////////////// + Node* node(size_t nidx); + const Node* node(size_t nidx) const; + + uchar* newNode(const int* idx, size_t hashval); + void removeNode(size_t hidx, size_t nidx, size_t previdx); + void resizeHashTab(size_t newsize); + + int flags; + Hdr* hdr; +}; + + + +///////////////////////////////// SparseMat_<_Tp> //////////////////////////////////// + +/** @brief Template sparse n-dimensional array class derived from SparseMat + +SparseMat_ is a thin wrapper on top of SparseMat created in the same way as Mat_ . It simplifies +notation of some operations: +@code + int sz[] = {10, 20, 30}; + SparseMat_ M(3, sz); + ... + M.ref(1, 2, 3) = M(4, 5, 6) + M(7, 8, 9); +@endcode + */ +template class SparseMat_ : public SparseMat +{ +public: + typedef SparseMatIterator_<_Tp> iterator; + typedef SparseMatConstIterator_<_Tp> const_iterator; + + //! the default constructor + SparseMat_(); + //! the full constructor equivalent to SparseMat(dims, _sizes, DataType<_Tp>::type) + SparseMat_(int dims, const int* _sizes); + //! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted + SparseMat_(const SparseMat& m); + //! the copy constructor. This is O(1) operation - no data is copied + SparseMat_(const SparseMat_& m); + //! converts dense matrix to the sparse form + SparseMat_(const Mat& m); + //! converts the old-style sparse matrix to the C++ class. All the elements are copied + //SparseMat_(const CvSparseMat* m); + //! the assignment operator. If DataType<_Tp>.type != m.type(), the m elements are converted + SparseMat_& operator = (const SparseMat& m); + //! the assignment operator. This is O(1) operation - no data is copied + SparseMat_& operator = (const SparseMat_& m); + //! converts dense matrix to the sparse form + SparseMat_& operator = (const Mat& m); + + //! makes full copy of the matrix. All the elements are duplicated + SparseMat_ clone() const CV_NODISCARD; + //! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type) + void create(int dims, const int* _sizes); + //! converts sparse matrix to the old-style CvSparseMat. All the elements are copied + //operator CvSparseMat*() const; + + //! returns type of the matrix elements + int type() const; + //! returns depth of the matrix elements + int depth() const; + //! returns the number of channels in each matrix element + int channels() const; + + //! equivalent to SparseMat::ref<_Tp>(i0, hashval) + _Tp& ref(int i0, size_t* hashval=0); + //! equivalent to SparseMat::ref<_Tp>(i0, i1, hashval) + _Tp& ref(int i0, int i1, size_t* hashval=0); + //! equivalent to SparseMat::ref<_Tp>(i0, i1, i2, hashval) + _Tp& ref(int i0, int i1, int i2, size_t* hashval=0); + //! equivalent to SparseMat::ref<_Tp>(idx, hashval) + _Tp& ref(const int* idx, size_t* hashval=0); + + //! equivalent to SparseMat::value<_Tp>(i0, hashval) + _Tp operator()(int i0, size_t* hashval=0) const; + //! equivalent to SparseMat::value<_Tp>(i0, i1, hashval) + _Tp operator()(int i0, int i1, size_t* hashval=0) const; + //! equivalent to SparseMat::value<_Tp>(i0, i1, i2, hashval) + _Tp operator()(int i0, int i1, int i2, size_t* hashval=0) const; + //! equivalent to SparseMat::value<_Tp>(idx, hashval) + _Tp operator()(const int* idx, size_t* hashval=0) const; + + //! returns sparse matrix iterator pointing to the first sparse matrix element + SparseMatIterator_<_Tp> begin(); + //! returns read-only sparse matrix iterator pointing to the first sparse matrix element + SparseMatConstIterator_<_Tp> begin() const; + //! returns sparse matrix iterator pointing to the element following the last sparse matrix element + SparseMatIterator_<_Tp> end(); + //! returns read-only sparse matrix iterator pointing to the element following the last sparse matrix element + SparseMatConstIterator_<_Tp> end() const; +}; + + + +////////////////////////////////// MatConstIterator ////////////////////////////////// + +class CV_EXPORTS MatConstIterator +{ +public: + typedef uchar* value_type; + typedef ptrdiff_t difference_type; + typedef const uchar** pointer; + typedef uchar* reference; + + typedef std::random_access_iterator_tag iterator_category; + + //! default constructor + MatConstIterator(); + //! constructor that sets the iterator to the beginning of the matrix + MatConstIterator(const Mat* _m); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator(const Mat* _m, int _row, int _col=0); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator(const Mat* _m, Point _pt); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator(const Mat* _m, const int* _idx); + //! copy constructor + MatConstIterator(const MatConstIterator& it); + + //! copy operator + MatConstIterator& operator = (const MatConstIterator& it); + //! returns the current matrix element + const uchar* operator *() const; + //! returns the i-th matrix element, relative to the current + const uchar* operator [](ptrdiff_t i) const; + + //! shifts the iterator forward by the specified number of elements + MatConstIterator& operator += (ptrdiff_t ofs); + //! shifts the iterator backward by the specified number of elements + MatConstIterator& operator -= (ptrdiff_t ofs); + //! decrements the iterator + MatConstIterator& operator --(); + //! decrements the iterator + MatConstIterator operator --(int); + //! increments the iterator + MatConstIterator& operator ++(); + //! increments the iterator + MatConstIterator operator ++(int); + //! returns the current iterator position + Point pos() const; + //! returns the current iterator position + void pos(int* _idx) const; + + ptrdiff_t lpos() const; + void seek(ptrdiff_t ofs, bool relative = false); + void seek(const int* _idx, bool relative = false); + + const Mat* m; + size_t elemSize; + const uchar* ptr; + const uchar* sliceStart; + const uchar* sliceEnd; +}; + + + +////////////////////////////////// MatConstIterator_ ///////////////////////////////// + +/** @brief Matrix read-only iterator + */ +template +class MatConstIterator_ : public MatConstIterator +{ +public: + typedef _Tp value_type; + typedef ptrdiff_t difference_type; + typedef const _Tp* pointer; + typedef const _Tp& reference; + + typedef std::random_access_iterator_tag iterator_category; + + //! default constructor + MatConstIterator_(); + //! constructor that sets the iterator to the beginning of the matrix + MatConstIterator_(const Mat_<_Tp>* _m); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col=0); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator_(const Mat_<_Tp>* _m, Point _pt); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator_(const Mat_<_Tp>* _m, const int* _idx); + //! copy constructor + MatConstIterator_(const MatConstIterator_& it); + + //! copy operator + MatConstIterator_& operator = (const MatConstIterator_& it); + //! returns the current matrix element + const _Tp& operator *() const; + //! returns the i-th matrix element, relative to the current + const _Tp& operator [](ptrdiff_t i) const; + + //! shifts the iterator forward by the specified number of elements + MatConstIterator_& operator += (ptrdiff_t ofs); + //! shifts the iterator backward by the specified number of elements + MatConstIterator_& operator -= (ptrdiff_t ofs); + //! decrements the iterator + MatConstIterator_& operator --(); + //! decrements the iterator + MatConstIterator_ operator --(int); + //! increments the iterator + MatConstIterator_& operator ++(); + //! increments the iterator + MatConstIterator_ operator ++(int); + //! returns the current iterator position + Point pos() const; +}; + + + +//////////////////////////////////// MatIterator_ //////////////////////////////////// + +/** @brief Matrix read-write iterator +*/ +template +class MatIterator_ : public MatConstIterator_<_Tp> +{ +public: + typedef _Tp* pointer; + typedef _Tp& reference; + + typedef std::random_access_iterator_tag iterator_category; + + //! the default constructor + MatIterator_(); + //! constructor that sets the iterator to the beginning of the matrix + MatIterator_(Mat_<_Tp>* _m); + //! constructor that sets the iterator to the specified element of the matrix + MatIterator_(Mat_<_Tp>* _m, int _row, int _col=0); + //! constructor that sets the iterator to the specified element of the matrix + MatIterator_(Mat_<_Tp>* _m, Point _pt); + //! constructor that sets the iterator to the specified element of the matrix + MatIterator_(Mat_<_Tp>* _m, const int* _idx); + //! copy constructor + MatIterator_(const MatIterator_& it); + //! copy operator + MatIterator_& operator = (const MatIterator_<_Tp>& it ); + + //! returns the current matrix element + _Tp& operator *() const; + //! returns the i-th matrix element, relative to the current + _Tp& operator [](ptrdiff_t i) const; + + //! shifts the iterator forward by the specified number of elements + MatIterator_& operator += (ptrdiff_t ofs); + //! shifts the iterator backward by the specified number of elements + MatIterator_& operator -= (ptrdiff_t ofs); + //! decrements the iterator + MatIterator_& operator --(); + //! decrements the iterator + MatIterator_ operator --(int); + //! increments the iterator + MatIterator_& operator ++(); + //! increments the iterator + MatIterator_ operator ++(int); +}; + + + +/////////////////////////////// SparseMatConstIterator /////////////////////////////// + +/** @brief Read-Only Sparse Matrix Iterator. + + Here is how to use the iterator to compute the sum of floating-point sparse matrix elements: + + \code + SparseMatConstIterator it = m.begin(), it_end = m.end(); + double s = 0; + CV_Assert( m.type() == CV_32F ); + for( ; it != it_end; ++it ) + s += it.value(); + \endcode +*/ +class CV_EXPORTS SparseMatConstIterator +{ +public: + //! the default constructor + SparseMatConstIterator(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatConstIterator(const SparseMat* _m); + //! the copy constructor + SparseMatConstIterator(const SparseMatConstIterator& it); + + //! the assignment operator + SparseMatConstIterator& operator = (const SparseMatConstIterator& it); + + //! template method returning the current matrix element + template const _Tp& value() const; + //! returns the current node of the sparse matrix. it.node->idx is the current element index + const SparseMat::Node* node() const; + + //! moves iterator to the previous element + SparseMatConstIterator& operator --(); + //! moves iterator to the previous element + SparseMatConstIterator operator --(int); + //! moves iterator to the next element + SparseMatConstIterator& operator ++(); + //! moves iterator to the next element + SparseMatConstIterator operator ++(int); + + //! moves iterator to the element after the last element + void seekEnd(); + + const SparseMat* m; + size_t hashidx; + uchar* ptr; +}; + + + +////////////////////////////////// SparseMatIterator ///////////////////////////////// + +/** @brief Read-write Sparse Matrix Iterator + + The class is similar to cv::SparseMatConstIterator, + but can be used for in-place modification of the matrix elements. +*/ +class CV_EXPORTS SparseMatIterator : public SparseMatConstIterator +{ +public: + //! the default constructor + SparseMatIterator(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatIterator(SparseMat* _m); + //! the full constructor setting the iterator to the specified sparse matrix element + SparseMatIterator(SparseMat* _m, const int* idx); + //! the copy constructor + SparseMatIterator(const SparseMatIterator& it); + + //! the assignment operator + SparseMatIterator& operator = (const SparseMatIterator& it); + //! returns read-write reference to the current sparse matrix element + template _Tp& value() const; + //! returns pointer to the current sparse matrix node. it.node->idx is the index of the current element (do not modify it!) + SparseMat::Node* node() const; + + //! moves iterator to the next element + SparseMatIterator& operator ++(); + //! moves iterator to the next element + SparseMatIterator operator ++(int); +}; + + + +/////////////////////////////// SparseMatConstIterator_ ////////////////////////////// + +/** @brief Template Read-Only Sparse Matrix Iterator Class. + + This is the derived from SparseMatConstIterator class that + introduces more convenient operator *() for accessing the current element. +*/ +template class SparseMatConstIterator_ : public SparseMatConstIterator +{ +public: + + typedef std::forward_iterator_tag iterator_category; + + //! the default constructor + SparseMatConstIterator_(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatConstIterator_(const SparseMat_<_Tp>* _m); + SparseMatConstIterator_(const SparseMat* _m); + //! the copy constructor + SparseMatConstIterator_(const SparseMatConstIterator_& it); + + //! the assignment operator + SparseMatConstIterator_& operator = (const SparseMatConstIterator_& it); + //! the element access operator + const _Tp& operator *() const; + + //! moves iterator to the next element + SparseMatConstIterator_& operator ++(); + //! moves iterator to the next element + SparseMatConstIterator_ operator ++(int); +}; + + + +///////////////////////////////// SparseMatIterator_ ///////////////////////////////// + +/** @brief Template Read-Write Sparse Matrix Iterator Class. + + This is the derived from cv::SparseMatConstIterator_ class that + introduces more convenient operator *() for accessing the current element. +*/ +template class SparseMatIterator_ : public SparseMatConstIterator_<_Tp> +{ +public: + + typedef std::forward_iterator_tag iterator_category; + + //! the default constructor + SparseMatIterator_(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatIterator_(SparseMat_<_Tp>* _m); + SparseMatIterator_(SparseMat* _m); + //! the copy constructor + SparseMatIterator_(const SparseMatIterator_& it); + + //! the assignment operator + SparseMatIterator_& operator = (const SparseMatIterator_& it); + //! returns the reference to the current element + _Tp& operator *() const; + + //! moves the iterator to the next element + SparseMatIterator_& operator ++(); + //! moves the iterator to the next element + SparseMatIterator_ operator ++(int); +}; + + + +/////////////////////////////////// NAryMatIterator ////////////////////////////////// + +/** @brief n-ary multi-dimensional array iterator. + +Use the class to implement unary, binary, and, generally, n-ary element-wise operations on +multi-dimensional arrays. Some of the arguments of an n-ary function may be continuous arrays, some +may be not. It is possible to use conventional MatIterator 's for each array but incrementing all of +the iterators after each small operations may be a big overhead. In this case consider using +NAryMatIterator to iterate through several matrices simultaneously as long as they have the same +geometry (dimensionality and all the dimension sizes are the same). On each iteration `it.planes[0]`, +`it.planes[1]`,... will be the slices of the corresponding matrices. + +The example below illustrates how you can compute a normalized and threshold 3D color histogram: +@code + void computeNormalizedColorHist(const Mat& image, Mat& hist, int N, double minProb) + { + const int histSize[] = {N, N, N}; + + // make sure that the histogram has a proper size and type + hist.create(3, histSize, CV_32F); + + // and clear it + hist = Scalar(0); + + // the loop below assumes that the image + // is a 8-bit 3-channel. check it. + CV_Assert(image.type() == CV_8UC3); + MatConstIterator_ it = image.begin(), + it_end = image.end(); + for( ; it != it_end; ++it ) + { + const Vec3b& pix = *it; + hist.at(pix[0]*N/256, pix[1]*N/256, pix[2]*N/256) += 1.f; + } + + minProb *= image.rows*image.cols; + + // initialize iterator (the style is different from STL). + // after initialization the iterator will contain + // the number of slices or planes the iterator will go through. + // it simultaneously increments iterators for several matrices + // supplied as a null terminated list of pointers + const Mat* arrays[] = {&hist, 0}; + Mat planes[1]; + NAryMatIterator itNAry(arrays, planes, 1); + double s = 0; + // iterate through the matrix. on each iteration + // itNAry.planes[i] (of type Mat) will be set to the current plane + // of the i-th n-dim matrix passed to the iterator constructor. + for(int p = 0; p < itNAry.nplanes; p++, ++itNAry) + { + threshold(itNAry.planes[0], itNAry.planes[0], minProb, 0, THRESH_TOZERO); + s += sum(itNAry.planes[0])[0]; + } + + s = 1./s; + itNAry = NAryMatIterator(arrays, planes, 1); + for(int p = 0; p < itNAry.nplanes; p++, ++itNAry) + itNAry.planes[0] *= s; + } +@endcode + */ +class CV_EXPORTS NAryMatIterator +{ +public: + //! the default constructor + NAryMatIterator(); + //! the full constructor taking arbitrary number of n-dim matrices + NAryMatIterator(const Mat** arrays, uchar** ptrs, int narrays=-1); + //! the full constructor taking arbitrary number of n-dim matrices + NAryMatIterator(const Mat** arrays, Mat* planes, int narrays=-1); + //! the separate iterator initialization method + void init(const Mat** arrays, Mat* planes, uchar** ptrs, int narrays=-1); + + //! proceeds to the next plane of every iterated matrix + NAryMatIterator& operator ++(); + //! proceeds to the next plane of every iterated matrix (postfix increment operator) + NAryMatIterator operator ++(int); + + //! the iterated arrays + const Mat** arrays; + //! the current planes + Mat* planes; + //! data pointers + uchar** ptrs; + //! the number of arrays + int narrays; + //! the number of hyper-planes that the iterator steps through + size_t nplanes; + //! the size of each segment (in elements) + size_t size; +protected: + int iterdepth; + size_t idx; +}; + + + +///////////////////////////////// Matrix Expressions ///////////////////////////////// + +class CV_EXPORTS MatOp +{ +public: + MatOp(); + virtual ~MatOp(); + + virtual bool elementWise(const MatExpr& expr) const; + virtual void assign(const MatExpr& expr, Mat& m, int type=-1) const = 0; + virtual void roi(const MatExpr& expr, const Range& rowRange, + const Range& colRange, MatExpr& res) const; + virtual void diag(const MatExpr& expr, int d, MatExpr& res) const; + virtual void augAssignAdd(const MatExpr& expr, Mat& m) const; + virtual void augAssignSubtract(const MatExpr& expr, Mat& m) const; + virtual void augAssignMultiply(const MatExpr& expr, Mat& m) const; + virtual void augAssignDivide(const MatExpr& expr, Mat& m) const; + virtual void augAssignAnd(const MatExpr& expr, Mat& m) const; + virtual void augAssignOr(const MatExpr& expr, Mat& m) const; + virtual void augAssignXor(const MatExpr& expr, Mat& m) const; + + virtual void add(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const; + virtual void add(const MatExpr& expr1, const Scalar& s, MatExpr& res) const; + + virtual void subtract(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const; + virtual void subtract(const Scalar& s, const MatExpr& expr, MatExpr& res) const; + + virtual void multiply(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const; + virtual void multiply(const MatExpr& expr1, double s, MatExpr& res) const; + + virtual void divide(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const; + virtual void divide(double s, const MatExpr& expr, MatExpr& res) const; + + virtual void abs(const MatExpr& expr, MatExpr& res) const; + + virtual void transpose(const MatExpr& expr, MatExpr& res) const; + virtual void matmul(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const; + virtual void invert(const MatExpr& expr, int method, MatExpr& res) const; + + virtual Size size(const MatExpr& expr) const; + virtual int type(const MatExpr& expr) const; +}; + +/** @brief Matrix expression representation +@anchor MatrixExpressions +This is a list of implemented matrix operations that can be combined in arbitrary complex +expressions (here A, B stand for matrices ( Mat ), s for a scalar ( Scalar ), alpha for a +real-valued scalar ( double )): +- Addition, subtraction, negation: `A+B`, `A-B`, `A+s`, `A-s`, `s+A`, `s-A`, `-A` +- Scaling: `A*alpha` +- Per-element multiplication and division: `A.mul(B)`, `A/B`, `alpha/A` +- Matrix multiplication: `A*B` +- Transposition: `A.t()` (means AT) +- Matrix inversion and pseudo-inversion, solving linear systems and least-squares problems: + `A.inv([method]) (~ A-1)`, `A.inv([method])*B (~ X: AX=B)` +- Comparison: `A cmpop B`, `A cmpop alpha`, `alpha cmpop A`, where *cmpop* is one of + `>`, `>=`, `==`, `!=`, `<=`, `<`. The result of comparison is an 8-bit single channel mask whose + elements are set to 255 (if the particular element or pair of elements satisfy the condition) or + 0. +- Bitwise logical operations: `A logicop B`, `A logicop s`, `s logicop A`, `~A`, where *logicop* is one of + `&`, `|`, `^`. +- Element-wise minimum and maximum: `min(A, B)`, `min(A, alpha)`, `max(A, B)`, `max(A, alpha)` +- Element-wise absolute value: `abs(A)` +- Cross-product, dot-product: `A.cross(B)`, `A.dot(B)` +- Any function of matrix or matrices and scalars that returns a matrix or a scalar, such as norm, + mean, sum, countNonZero, trace, determinant, repeat, and others. +- Matrix initializers ( Mat::eye(), Mat::zeros(), Mat::ones() ), matrix comma-separated + initializers, matrix constructors and operators that extract sub-matrices (see Mat description). +- Mat_() constructors to cast the result to the proper type. +@note Comma-separated initializers and probably some other operations may require additional +explicit Mat() or Mat_() constructor calls to resolve a possible ambiguity. + +Here are examples of matrix expressions: +@code + // compute pseudo-inverse of A, equivalent to A.inv(DECOMP_SVD) + SVD svd(A); + Mat pinvA = svd.vt.t()*Mat::diag(1./svd.w)*svd.u.t(); + + // compute the new vector of parameters in the Levenberg-Marquardt algorithm + x -= (A.t()*A + lambda*Mat::eye(A.cols,A.cols,A.type())).inv(DECOMP_CHOLESKY)*(A.t()*err); + + // sharpen image using "unsharp mask" algorithm + Mat blurred; double sigma = 1, threshold = 5, amount = 1; + GaussianBlur(img, blurred, Size(), sigma, sigma); + Mat lowContrastMask = abs(img - blurred) < threshold; + Mat sharpened = img*(1+amount) + blurred*(-amount); + img.copyTo(sharpened, lowContrastMask); +@endcode +*/ +class CV_EXPORTS MatExpr +{ +public: + MatExpr(); + explicit MatExpr(const Mat& m); + + MatExpr(const MatOp* _op, int _flags, const Mat& _a = Mat(), const Mat& _b = Mat(), + const Mat& _c = Mat(), double _alpha = 1, double _beta = 1, const Scalar& _s = Scalar()); + + operator Mat() const; + template operator Mat_<_Tp>() const; + + Size size() const; + int type() const; + + MatExpr row(int y) const; + MatExpr col(int x) const; + MatExpr diag(int d = 0) const; + MatExpr operator()( const Range& rowRange, const Range& colRange ) const; + MatExpr operator()( const Rect& roi ) const; + + MatExpr t() const; + MatExpr inv(int method = DECOMP_LU) const; + MatExpr mul(const MatExpr& e, double scale=1) const; + MatExpr mul(const Mat& m, double scale=1) const; + + Mat cross(const Mat& m) const; + double dot(const Mat& m) const; + + void swap(MatExpr& b); + + const MatOp* op; + int flags; + + Mat a, b, c; + double alpha, beta; + Scalar s; +}; + +//! @} core_basic + +//! @relates cv::MatExpr +//! @{ +CV_EXPORTS MatExpr operator + (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator + (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator + (const Scalar& s, const Mat& a); +CV_EXPORTS MatExpr operator + (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s); +CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e); +CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator + (const Mat& a, const Matx<_Tp, m, n>& b) { return a + Mat(b); } +template static inline +MatExpr operator + (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) + b; } + +CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator - (const Scalar& s, const Mat& a); +CV_EXPORTS MatExpr operator - (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s); +CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e); +CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator - (const Mat& a, const Matx<_Tp, m, n>& b) { return a - Mat(b); } +template static inline +MatExpr operator - (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) - b; } + +CV_EXPORTS MatExpr operator - (const Mat& m); +CV_EXPORTS MatExpr operator - (const MatExpr& e); + +CV_EXPORTS MatExpr operator * (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator * (const Mat& a, double s); +CV_EXPORTS MatExpr operator * (double s, const Mat& a); +CV_EXPORTS MatExpr operator * (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator * (const MatExpr& e, double s); +CV_EXPORTS MatExpr operator * (double s, const MatExpr& e); +CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator * (const Mat& a, const Matx<_Tp, m, n>& b) { return a * Mat(b); } +template static inline +MatExpr operator * (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) * b; } + +CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator / (const Mat& a, double s); +CV_EXPORTS MatExpr operator / (double s, const Mat& a); +CV_EXPORTS MatExpr operator / (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator / (const MatExpr& e, double s); +CV_EXPORTS MatExpr operator / (double s, const MatExpr& e); +CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator / (const Mat& a, const Matx<_Tp, m, n>& b) { return a / Mat(b); } +template static inline +MatExpr operator / (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) / b; } + +CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator < (const Mat& a, double s); +CV_EXPORTS MatExpr operator < (double s, const Mat& a); +template static inline +MatExpr operator < (const Mat& a, const Matx<_Tp, m, n>& b) { return a < Mat(b); } +template static inline +MatExpr operator < (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) < b; } + +CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator <= (const Mat& a, double s); +CV_EXPORTS MatExpr operator <= (double s, const Mat& a); +template static inline +MatExpr operator <= (const Mat& a, const Matx<_Tp, m, n>& b) { return a <= Mat(b); } +template static inline +MatExpr operator <= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) <= b; } + +CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator == (const Mat& a, double s); +CV_EXPORTS MatExpr operator == (double s, const Mat& a); +template static inline +MatExpr operator == (const Mat& a, const Matx<_Tp, m, n>& b) { return a == Mat(b); } +template static inline +MatExpr operator == (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) == b; } + +CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator != (const Mat& a, double s); +CV_EXPORTS MatExpr operator != (double s, const Mat& a); +template static inline +MatExpr operator != (const Mat& a, const Matx<_Tp, m, n>& b) { return a != Mat(b); } +template static inline +MatExpr operator != (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) != b; } + +CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator >= (const Mat& a, double s); +CV_EXPORTS MatExpr operator >= (double s, const Mat& a); +template static inline +MatExpr operator >= (const Mat& a, const Matx<_Tp, m, n>& b) { return a >= Mat(b); } +template static inline +MatExpr operator >= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) >= b; } + +CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator > (const Mat& a, double s); +CV_EXPORTS MatExpr operator > (double s, const Mat& a); +template static inline +MatExpr operator > (const Mat& a, const Matx<_Tp, m, n>& b) { return a > Mat(b); } +template static inline +MatExpr operator > (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) > b; } + +CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a); +template static inline +MatExpr operator & (const Mat& a, const Matx<_Tp, m, n>& b) { return a & Mat(b); } +template static inline +MatExpr operator & (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) & b; } + +CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a); +template static inline +MatExpr operator | (const Mat& a, const Matx<_Tp, m, n>& b) { return a | Mat(b); } +template static inline +MatExpr operator | (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) | b; } + +CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a); +template static inline +MatExpr operator ^ (const Mat& a, const Matx<_Tp, m, n>& b) { return a ^ Mat(b); } +template static inline +MatExpr operator ^ (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) ^ b; } + +CV_EXPORTS MatExpr operator ~(const Mat& m); + +CV_EXPORTS MatExpr min(const Mat& a, const Mat& b); +CV_EXPORTS MatExpr min(const Mat& a, double s); +CV_EXPORTS MatExpr min(double s, const Mat& a); +template static inline +MatExpr min (const Mat& a, const Matx<_Tp, m, n>& b) { return min(a, Mat(b)); } +template static inline +MatExpr min (const Matx<_Tp, m, n>& a, const Mat& b) { return min(Mat(a), b); } + +CV_EXPORTS MatExpr max(const Mat& a, const Mat& b); +CV_EXPORTS MatExpr max(const Mat& a, double s); +CV_EXPORTS MatExpr max(double s, const Mat& a); +template static inline +MatExpr max (const Mat& a, const Matx<_Tp, m, n>& b) { return max(a, Mat(b)); } +template static inline +MatExpr max (const Matx<_Tp, m, n>& a, const Mat& b) { return max(Mat(a), b); } + +/** @brief Calculates an absolute value of each matrix element. + +abs is a meta-function that is expanded to one of absdiff or convertScaleAbs forms: +- C = abs(A-B) is equivalent to `absdiff(A, B, C)` +- C = abs(A) is equivalent to `absdiff(A, Scalar::all(0), C)` +- C = `Mat_ >(abs(A*alpha + beta))` is equivalent to `convertScaleAbs(A, C, alpha, +beta)` + +The output matrix has the same size and the same type as the input one except for the last case, +where C is depth=CV_8U . +@param m matrix. +@sa @ref MatrixExpressions, absdiff, convertScaleAbs + */ +CV_EXPORTS MatExpr abs(const Mat& m); +/** @overload +@param e matrix expression. +*/ +CV_EXPORTS MatExpr abs(const MatExpr& e); +//! @} relates cv::MatExpr + +} // cv + +#include "opencv2/core/mat.inl.hpp" + +#endif // OPENCV_CORE_MAT_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/mat.inl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/mat.inl.hpp new file mode 100644 index 0000000..d6296f8 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/mat.inl.hpp @@ -0,0 +1,3331 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_MATRIX_OPERATIONS_HPP +#define OPENCV_CORE_MATRIX_OPERATIONS_HPP + +#ifndef __cplusplus +# error mat.inl.hpp header must be compiled as C++ +#endif + +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + +#if defined(CV_SKIP_DISABLE_CLANG_ENUM_WARNINGS) + // nothing +#elif defined(CV_FORCE_DISABLE_CLANG_ENUM_WARNINGS) + #define CV_DISABLE_CLANG_ENUM_WARNINGS +#elif defined(__clang__) && defined(__has_warning) + #if __has_warning("-Wdeprecated-enum-enum-conversion") && __has_warning("-Wdeprecated-anon-enum-enum-conversion") + #define CV_DISABLE_CLANG_ENUM_WARNINGS + #endif +#endif +#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion" +#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion" +#endif + +namespace cv +{ +CV__DEBUG_NS_BEGIN + + +//! @cond IGNORED + +////////////////////////// Custom (raw) type wrapper ////////////////////////// + +template static inline +int rawType() +{ + CV_StaticAssert(sizeof(_Tp) <= CV_CN_MAX, "sizeof(_Tp) is too large"); + const int elemSize = sizeof(_Tp); + return (int)CV_MAKETYPE(CV_8U, elemSize); +} + +//////////////////////// Input/Output Arrays //////////////////////// + +inline void _InputArray::init(int _flags, const void* _obj) +{ flags = _flags; obj = (void*)_obj; } + +inline void _InputArray::init(int _flags, const void* _obj, Size _sz) +{ flags = _flags; obj = (void*)_obj; sz = _sz; } + +inline void* _InputArray::getObj() const { return obj; } +inline int _InputArray::getFlags() const { return flags; } +inline Size _InputArray::getSz() const { return sz; } + +inline _InputArray::_InputArray() { init(0 + NONE, 0); } +inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); } +inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); } +inline _InputArray::_InputArray(const UMat& m) { init(UMAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::vector<_Tp>& vec) +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); } + +template inline +_InputArray::_InputArray(const std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); } + +inline +_InputArray::_InputArray(const std::vector& vec) +{ init(FIXED_TYPE + STD_BOOL_VECTOR + traits::Type::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, &mtx, Size(n, m)); } + +template inline +_InputArray::_InputArray(const _Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, vec, Size(n, 1)); } + +template inline +_InputArray::_InputArray(const Mat_<_Tp>& m) +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_READ, &m); } + +inline _InputArray::_InputArray(const double& val) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); } + +inline _InputArray::_InputArray(const cuda::GpuMat& d_mat) +{ init(CUDA_GPU_MAT + ACCESS_READ, &d_mat); } + +inline _InputArray::_InputArray(const std::vector& d_mat) +{ init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_READ, &d_mat);} + +inline _InputArray::_InputArray(const ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_READ, &buf); } + +inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); } + +template inline +_InputArray _InputArray::rawIn(const std::vector<_Tp>& vec) +{ + _InputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_READ; + v.obj = (void*)&vec; + return v; +} + +template inline +_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr) +{ + _InputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + +inline _InputArray::~_InputArray() {} + +inline Mat _InputArray::getMat(int i) const +{ + if( kind() == MAT && i < 0 ) + return *(const Mat*)obj; + return getMat_(i); +} + +inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; } +inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; } +inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; } +inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; } +inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; } +inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || + kind() == _InputArray::STD_BOOL_VECTOR || + kind() == _InputArray::STD_ARRAY; } +inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; } +inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; } + +//////////////////////////////////////////////////////////////////////////////////////// + +inline _OutputArray::_OutputArray() { init(NONE + ACCESS_WRITE, 0); } +inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags + ACCESS_WRITE, _obj); } +inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_MAT + ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(UMat& m) { init(UMAT + ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(std::vector<_Tp>& vec) +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(Mat_<_Tp>& m) +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); } + +template inline +_OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); } + +template inline +_OutputArray::_OutputArray(_Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); } + +template inline +_OutputArray::_OutputArray(const std::vector<_Tp>& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(const std::array& arr) +{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(const Mat_<_Tp>& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); } + +template inline +_OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); } + +template inline +_OutputArray::_OutputArray(const _Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); } + +inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat) +{ init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } + +inline _OutputArray::_OutputArray(std::vector& d_mat) +{ init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_WRITE, &d_mat);} + +inline _OutputArray::_OutputArray(ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_WRITE, &buf); } + +inline _OutputArray::_OutputArray(cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } + +inline _OutputArray::_OutputArray(const Mat& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); } + +inline _OutputArray::_OutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_WRITE, &vec); } + +inline _OutputArray::_OutputArray(const UMat& m) +{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_WRITE, &m); } + +inline _OutputArray::_OutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } + +inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } + + +inline _OutputArray::_OutputArray(const ogl::Buffer& buf) +{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_WRITE, &buf); } + +inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } + +template inline +_OutputArray _OutputArray::rawOut(std::vector<_Tp>& vec) +{ + _OutputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_WRITE; + v.obj = (void*)&vec; + return v; +} + +template inline +_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr) +{ + _OutputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + +/////////////////////////////////////////////////////////////////////////////////////////// + +inline _InputOutputArray::_InputOutputArray() { init(0+ACCESS_RW, 0); } +inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags+ACCESS_RW, _obj); } +inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); } +inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec) +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(Mat_<_Tp>& m) +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); } + +template inline +_InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); } + +template inline +_InputOutputArray::_InputOutputArray(_Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::array& arr) +{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); } + +template inline +_InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); } + +template inline +_InputOutputArray::_InputOutputArray(const _Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); } + +inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat) +{ init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); } + +inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_RW, &buf); } + +inline _InputOutputArray::_InputOutputArray(cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } + +inline _InputOutputArray::_InputOutputArray(const Mat& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); } + +inline _InputOutputArray::_InputOutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); } + +inline _InputOutputArray::_InputOutputArray(const UMat& m) +{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_RW, &m); } + +inline _InputOutputArray::_InputOutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_RW, &vec); } + +inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_RW, &d_mat); } + +inline _InputOutputArray::_InputOutputArray(const std::vector& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);} + +template<> inline _InputOutputArray::_InputOutputArray(std::vector& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);} + +inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf) +{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); } + +inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } + +template inline +_InputOutputArray _InputOutputArray::rawInOut(std::vector<_Tp>& vec) +{ + _InputOutputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_RW; + v.obj = (void*)&vec; + return v; +} + +template inline +_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr) +{ + _InputOutputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + + +template static inline _InputArray rawIn(_Tp& v) { return _InputArray::rawIn(v); } +template static inline _OutputArray rawOut(_Tp& v) { return _OutputArray::rawOut(v); } +template static inline _InputOutputArray rawInOut(_Tp& v) { return _InputOutputArray::rawInOut(v); } + +CV__DEBUG_NS_END + +//////////////////////////////////////////// Mat ////////////////////////////////////////// + +template inline +Mat::Mat(const std::vector<_Tp>& vec, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), + cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if(vec.empty()) + return; + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)&vec[0]; + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this); +} + +template inline +Mat::Mat(const std::initializer_list<_Tp> list) + : Mat() +{ + CV_Assert(list.size() != 0); + Mat((int)list.size(), 1, traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this); +} + +template inline +Mat::Mat(const std::initializer_list sizes, const std::initializer_list<_Tp> list) + : Mat() +{ + size_t size_total = 1; + for(auto s : sizes) + size_total *= s; + CV_Assert(list.size() != 0); + CV_Assert(size_total == list.size()); + Mat((int)sizes.size(), (int*)sizes.begin(), traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this); +} + +template inline +Mat::Mat(const std::array<_Tp, _Nm>& arr, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)arr.size()), + cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if(arr.empty()) + return; + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)arr.data(); + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat((int)arr.size(), 1, traits::Type<_Tp>::value, (uchar*)arr.data()).copyTo(*this); +} + +template inline +Mat::Mat(const Vec<_Tp, n>& vec, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)vec.val; + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat(n, 1, traits::Type<_Tp>::value, (void*)vec.val).copyTo(*this); +} + + +template inline +Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = cols * sizeof(_Tp); + step[1] = sizeof(_Tp); + datastart = data = (uchar*)M.val; + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat(m, n, traits::Type<_Tp>::value, (uchar*)M.val).copyTo(*this); +} + +template inline +Mat::Mat(const Point_<_Tp>& pt, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)&pt.x; + datalimit = dataend = datastart + rows * step[0]; + } + else + { + create(2, 1, traits::Type<_Tp>::value); + ((_Tp*)data)[0] = pt.x; + ((_Tp*)data)[1] = pt.y; + } +} + +template inline +Mat::Mat(const Point3_<_Tp>& pt, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)&pt.x; + datalimit = dataend = datastart + rows * step[0]; + } + else + { + create(3, 1, traits::Type<_Tp>::value); + ((_Tp*)data)[0] = pt.x; + ((_Tp*)data)[1] = pt.y; + ((_Tp*)data)[2] = pt.z; + } +} + +template inline +Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0), + datastart(0), dataend(0), allocator(0), u(0), size(&rows) +{ + *this = commaInitializer.operator Mat_<_Tp>(); +} + +inline +Mat Mat::row(int y) const +{ + return Mat(*this, Range(y, y + 1), Range::all()); +} + +inline +Mat Mat::col(int x) const +{ + return Mat(*this, Range::all(), Range(x, x + 1)); +} + +inline +Mat Mat::rowRange(int startrow, int endrow) const +{ + return Mat(*this, Range(startrow, endrow), Range::all()); +} + +inline +Mat Mat::rowRange(const Range& r) const +{ + return Mat(*this, r, Range::all()); +} + +inline +Mat Mat::colRange(int startcol, int endcol) const +{ + return Mat(*this, Range::all(), Range(startcol, endcol)); +} + +inline +Mat Mat::colRange(const Range& r) const +{ + return Mat(*this, Range::all(), r); +} + +inline +Mat Mat::operator()( Range _rowRange, Range _colRange ) const +{ + return Mat(*this, _rowRange, _colRange); +} + +inline +Mat Mat::operator()( const Rect& roi ) const +{ + return Mat(*this, roi); +} + +inline +Mat Mat::operator()(const Range* ranges) const +{ + return Mat(*this, ranges); +} + +inline +Mat Mat::operator()(const std::vector& ranges) const +{ + return Mat(*this, ranges); +} + +inline +bool Mat::isContinuous() const +{ + return (flags & CONTINUOUS_FLAG) != 0; +} + +inline +bool Mat::isSubmatrix() const +{ + return (flags & SUBMATRIX_FLAG) != 0; +} + +inline +size_t Mat::elemSize() const +{ + size_t res = dims > 0 ? step.p[dims - 1] : 0; + CV_DbgAssert(res != 0); + return res; +} + +inline +size_t Mat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int Mat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int Mat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int Mat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +uchar* Mat::ptr(int y) +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return data + step.p[0] * y; +} + +inline +const uchar* Mat::ptr(int y) const +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return data + step.p[0] * y; +} + +template inline +_Tp* Mat::ptr(int y) +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return (_Tp*)(data + step.p[0] * y); +} + +template inline +const _Tp* Mat::ptr(int y) const +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return (const _Tp*)(data + step.p[0] * y); +} + +inline +uchar* Mat::ptr(int i0, int i1) +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return data + i0 * step.p[0] + i1 * step.p[1]; +} + +inline +const uchar* Mat::ptr(int i0, int i1) const +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return data + i0 * step.p[0] + i1 * step.p[1]; +} + +template inline +_Tp* Mat::ptr(int i0, int i1) +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1]); +} + +template inline +const _Tp* Mat::ptr(int i0, int i1) const +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1]); +} + +inline +uchar* Mat::ptr(int i0, int i1, int i2) +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]; +} + +inline +const uchar* Mat::ptr(int i0, int i1, int i2) const +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]; +} + +template inline +_Tp* Mat::ptr(int i0, int i1, int i2) +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]); +} + +template inline +const _Tp* Mat::ptr(int i0, int i1, int i2) const +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]); +} + +inline +uchar* Mat::ptr(const int* idx) +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return p; +} + +inline +const uchar* Mat::ptr(const int* idx) const +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return p; +} + +template inline +_Tp* Mat::ptr(const int* idx) +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return (_Tp*)p; +} + +template inline +const _Tp* Mat::ptr(const int* idx) const +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return (const _Tp*)p; +} + +template inline +_Tp& Mat::at(int i0, int i1) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((_Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +const _Tp& Mat::at(int i0, int i1) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((const _Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +_Tp& Mat::at(Point pt) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((_Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +const _Tp& Mat::at(Point pt) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +_Tp& Mat::at(int i0) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1])); + CV_DbgAssert(elemSize() == sizeof(_Tp)); + if( isContinuous() || size.p[0] == 1 ) + return ((_Tp*)data)[i0]; + if( size.p[1] == 1 ) + return *(_Tp*)(data + step.p[0] * i0); + int i = i0 / cols, j = i0 - i * cols; + return ((_Tp*)(data + step.p[0] * i))[j]; +} + +template inline +const _Tp& Mat::at(int i0) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1])); + CV_DbgAssert(elemSize() == sizeof(_Tp)); + if( isContinuous() || size.p[0] == 1 ) + return ((const _Tp*)data)[i0]; + if( size.p[1] == 1 ) + return *(const _Tp*)(data + step.p[0] * i0); + int i = i0 / cols, j = i0 - i * cols; + return ((const _Tp*)(data + step.p[0] * i))[j]; +} + +template inline +_Tp& Mat::at(int i0, int i1, int i2) +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(_Tp*)ptr(i0, i1, i2); +} + +template inline +const _Tp& Mat::at(int i0, int i1, int i2) const +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(const _Tp*)ptr(i0, i1, i2); +} + +template inline +_Tp& Mat::at(const int* idx) +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(_Tp*)ptr(idx); +} + +template inline +const _Tp& Mat::at(const int* idx) const +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(const _Tp*)ptr(idx); +} + +template inline +_Tp& Mat::at(const Vec& idx) +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(_Tp*)ptr(idx.val); +} + +template inline +const _Tp& Mat::at(const Vec& idx) const +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(const _Tp*)ptr(idx.val); +} + +template inline +MatConstIterator_<_Tp> Mat::begin() const +{ + if (empty()) + return MatConstIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this); +} + +template inline +MatConstIterator_<_Tp> Mat::end() const +{ + if (empty()) + return MatConstIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this); + it += total(); + return it; +} + +template inline +MatIterator_<_Tp> Mat::begin() +{ + if (empty()) + return MatIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return MatIterator_<_Tp>((Mat_<_Tp>*)this); +} + +template inline +MatIterator_<_Tp> Mat::end() +{ + if (empty()) + return MatIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + MatIterator_<_Tp> it((Mat_<_Tp>*)this); + it += total(); + return it; +} + +template inline +void Mat::forEach(const Functor& operation) { + this->forEach_impl<_Tp>(operation); +} + +template inline +void Mat::forEach(const Functor& operation) const { + // call as not const + (const_cast(this))->forEach<_Tp>(operation); +} + +template inline +Mat::operator std::vector<_Tp>() const +{ + std::vector<_Tp> v; + copyTo(v); + return v; +} + +template inline +Mat::operator std::array<_Tp, _Nm>() const +{ + std::array<_Tp, _Nm> v; + copyTo(v); + return v; +} + +template inline +Mat::operator Vec<_Tp, n>() const +{ + CV_Assert( data && dims <= 2 && (rows == 1 || cols == 1) && + rows + cols - 1 == n && channels() == 1 ); + + if( isContinuous() && type() == traits::Type<_Tp>::value ) + return Vec<_Tp, n>((_Tp*)data); + Vec<_Tp, n> v; + Mat tmp(rows, cols, traits::Type<_Tp>::value, v.val); + convertTo(tmp, tmp.type()); + return v; +} + +template inline +Mat::operator Matx<_Tp, m, n>() const +{ + CV_Assert( data && dims <= 2 && rows == m && cols == n && channels() == 1 ); + + if( isContinuous() && type() == traits::Type<_Tp>::value ) + return Matx<_Tp, m, n>((_Tp*)data); + Matx<_Tp, m, n> mtx; + Mat tmp(rows, cols, traits::Type<_Tp>::value, mtx.val); + convertTo(tmp, tmp.type()); + return mtx; +} + +template inline +void Mat::push_back(const _Tp& elem) +{ + if( !data ) + { + *this = Mat(1, 1, traits::Type<_Tp>::value, (void*)&elem).clone(); + return; + } + CV_Assert(traits::Type<_Tp>::value == type() && cols == 1 + /* && dims == 2 (cols == 1 implies dims == 2) */); + const uchar* tmp = dataend + step[0]; + if( !isSubmatrix() && isContinuous() && tmp <= datalimit ) + { + *(_Tp*)(data + (size.p[0]++) * step.p[0]) = elem; + dataend = tmp; + } + else + push_back_(&elem); +} + +template inline +void Mat::push_back(const Mat_<_Tp>& m) +{ + push_back((const Mat&)m); +} + +template<> inline +void Mat::push_back(const MatExpr& expr) +{ + push_back(static_cast(expr)); +} + + +template inline +void Mat::push_back(const std::vector<_Tp>& v) +{ + push_back(Mat(v)); +} + + +///////////////////////////// MatSize //////////////////////////// + +inline +MatSize::MatSize(int* _p) + : p(_p) {} + +inline +int MatSize::dims() const +{ + return (p - 1)[0]; +} + +inline +Size MatSize::operator()() const +{ + CV_DbgAssert(dims() <= 2); + return Size(p[1], p[0]); +} + +inline +const int& MatSize::operator[](int i) const +{ + CV_DbgAssert(i < dims()); +#ifdef __OPENCV_BUILD + CV_DbgAssert(i >= 0); +#endif + return p[i]; +} + +inline +int& MatSize::operator[](int i) +{ + CV_DbgAssert(i < dims()); +#ifdef __OPENCV_BUILD + CV_DbgAssert(i >= 0); +#endif + return p[i]; +} + +inline +MatSize::operator const int*() const +{ + return p; +} + +inline +bool MatSize::operator != (const MatSize& sz) const +{ + return !(*this == sz); +} + + + +///////////////////////////// MatStep //////////////////////////// + +inline +MatStep::MatStep() +{ + p = buf; p[0] = p[1] = 0; +} + +inline +MatStep::MatStep(size_t s) +{ + p = buf; p[0] = s; p[1] = 0; +} + +inline +const size_t& MatStep::operator[](int i) const +{ + return p[i]; +} + +inline +size_t& MatStep::operator[](int i) +{ + return p[i]; +} + +inline MatStep::operator size_t() const +{ + CV_DbgAssert( p == buf ); + return buf[0]; +} + +inline MatStep& MatStep::operator = (size_t s) +{ + CV_DbgAssert( p == buf ); + buf[0] = s; + return *this; +} + + + +////////////////////////////// Mat_<_Tp> //////////////////////////// + +template inline +Mat_<_Tp>::Mat_() + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; +} + +template inline +Mat_<_Tp>::Mat_(int _rows, int _cols) + : Mat(_rows, _cols, traits::Type<_Tp>::value) +{ +} + +template inline +Mat_<_Tp>::Mat_(int _rows, int _cols, const _Tp& value) + : Mat(_rows, _cols, traits::Type<_Tp>::value) +{ + *this = value; +} + +template inline +Mat_<_Tp>::Mat_(Size _sz) + : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value) +{} + +template inline +Mat_<_Tp>::Mat_(Size _sz, const _Tp& value) + : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value) +{ + *this = value; +} + +template inline +Mat_<_Tp>::Mat_(int _dims, const int* _sz) + : Mat(_dims, _sz, traits::Type<_Tp>::value) +{} + +template inline +Mat_<_Tp>::Mat_(int _dims, const int* _sz, const _Tp& _s) + : Mat(_dims, _sz, traits::Type<_Tp>::value, Scalar(_s)) +{} + +template inline +Mat_<_Tp>::Mat_(int _dims, const int* _sz, _Tp* _data, const size_t* _steps) + : Mat(_dims, _sz, traits::Type<_Tp>::value, _data, _steps) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const Range* ranges) + : Mat(m, ranges) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const std::vector& ranges) + : Mat(m, ranges) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat& m) + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; + *this = m; +} + +template inline +Mat_<_Tp>::Mat_(const Mat_& m) + : Mat(m) +{} + +template inline +Mat_<_Tp>::Mat_(int _rows, int _cols, _Tp* _data, size_t steps) + : Mat(_rows, _cols, traits::Type<_Tp>::value, _data, steps) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_& m, const Range& _rowRange, const Range& _colRange) + : Mat(m, _rowRange, _colRange) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_& m, const Rect& roi) + : Mat(m, roi) +{} + +template template inline +Mat_<_Tp>::Mat_(const Vec::channel_type, n>& vec, bool copyData) + : Mat(n / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&vec) +{ + CV_Assert(n%DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template template inline +Mat_<_Tp>::Mat_(const Matx::channel_type, m, n>& M, bool copyData) + : Mat(m, n / DataType<_Tp>::channels, traits::Type<_Tp>::value, (void*)&M) +{ + CV_Assert(n % DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template inline +Mat_<_Tp>::Mat_(const Point_::channel_type>& pt, bool copyData) + : Mat(2 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt) +{ + CV_Assert(2 % DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template inline +Mat_<_Tp>::Mat_(const Point3_::channel_type>& pt, bool copyData) + : Mat(3 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt) +{ + CV_Assert(3 % DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template inline +Mat_<_Tp>::Mat_(const MatCommaInitializer_<_Tp>& commaInitializer) + : Mat(commaInitializer) +{} + +template inline +Mat_<_Tp>::Mat_(const std::vector<_Tp>& vec, bool copyData) + : Mat(vec, copyData) +{} + +template inline +Mat_<_Tp>::Mat_(std::initializer_list<_Tp> list) + : Mat(list) +{} + +template inline +Mat_<_Tp>::Mat_(const std::initializer_list sizes, std::initializer_list<_Tp> list) + : Mat(sizes, list) +{} + +template template inline +Mat_<_Tp>::Mat_(const std::array<_Tp, _Nm>& arr, bool copyData) + : Mat(arr, copyData) +{} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m) +{ + if (m.empty()) + { + release(); + return *this; + } + if( traits::Type<_Tp>::value == m.type() ) + { + Mat::operator = (m); + return *this; + } + if( traits::Depth<_Tp>::value == m.depth() ) + { + return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0)); + } + CV_Assert(DataType<_Tp>::channels == m.channels() || m.empty()); + m.convertTo(*this, type()); + return *this; +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat_& m) +{ + Mat::operator=(m); + return *this; +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const _Tp& s) +{ + typedef typename DataType<_Tp>::vec_type VT; + Mat::operator=(Scalar((const VT&)s)); + return *this; +} + +template inline +void Mat_<_Tp>::create(int _rows, int _cols) +{ + Mat::create(_rows, _cols, traits::Type<_Tp>::value); +} + +template inline +void Mat_<_Tp>::create(Size _sz) +{ + Mat::create(_sz, traits::Type<_Tp>::value); +} + +template inline +void Mat_<_Tp>::create(int _dims, const int* _sz) +{ + Mat::create(_dims, _sz, traits::Type<_Tp>::value); +} + +template inline +void Mat_<_Tp>::release() +{ + Mat::release(); + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; +} + +template inline +Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const +{ + return Mat_<_Tp>(Mat::cross(m)); +} + +template template inline +Mat_<_Tp>::operator Mat_() const +{ + return Mat_(static_cast(*this)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::row(int y) const +{ + return Mat_(*this, Range(y, y+1), Range::all()); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::col(int x) const +{ + return Mat_(*this, Range::all(), Range(x, x+1)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::diag(int d) const +{ + return Mat_(Mat::diag(d)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::clone() const +{ + return Mat_(Mat::clone()); +} + +template inline +size_t Mat_<_Tp>::elemSize() const +{ + CV_DbgAssert( Mat::elemSize() == sizeof(_Tp) ); + return sizeof(_Tp); +} + +template inline +size_t Mat_<_Tp>::elemSize1() const +{ + CV_DbgAssert( Mat::elemSize1() == sizeof(_Tp) / DataType<_Tp>::channels ); + return sizeof(_Tp) / DataType<_Tp>::channels; +} + +template inline +int Mat_<_Tp>::type() const +{ + CV_DbgAssert( Mat::type() == traits::Type<_Tp>::value ); + return traits::Type<_Tp>::value; +} + +template inline +int Mat_<_Tp>::depth() const +{ + CV_DbgAssert( Mat::depth() == traits::Depth<_Tp>::value ); + return traits::Depth<_Tp>::value; +} + +template inline +int Mat_<_Tp>::channels() const +{ + CV_DbgAssert( Mat::channels() == DataType<_Tp>::channels ); + return DataType<_Tp>::channels; +} + +template inline +size_t Mat_<_Tp>::stepT(int i) const +{ + return step.p[i] / elemSize(); +} + +template inline +size_t Mat_<_Tp>::step1(int i) const +{ + return step.p[i] / elemSize1(); +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::adjustROI( int dtop, int dbottom, int dleft, int dright ) +{ + return (Mat_<_Tp>&)(Mat::adjustROI(dtop, dbottom, dleft, dright)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()( const Range& _rowRange, const Range& _colRange ) const +{ + return Mat_<_Tp>(*this, _rowRange, _colRange); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()( const Rect& roi ) const +{ + return Mat_<_Tp>(*this, roi); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const +{ + return Mat_<_Tp>(*this, ranges); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()(const std::vector& ranges) const +{ + return Mat_<_Tp>(*this, ranges); +} + +template inline +_Tp* Mat_<_Tp>::operator [](int y) +{ + CV_DbgAssert( 0 <= y && y < size.p[0] ); + return (_Tp*)(data + y*step.p[0]); +} + +template inline +const _Tp* Mat_<_Tp>::operator [](int y) const +{ + CV_DbgAssert( 0 <= y && y < size.p[0] ); + return (const _Tp*)(data + y*step.p[0]); +} + +template inline +_Tp& Mat_<_Tp>::operator ()(int i0, int i1) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((_Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(int i0, int i1) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((const _Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +_Tp& Mat_<_Tp>::operator ()(Point pt) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((_Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(Point pt) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +_Tp& Mat_<_Tp>::operator ()(const int* idx) +{ + return Mat::at<_Tp>(idx); +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(const int* idx) const +{ + return Mat::at<_Tp>(idx); +} + +template template inline +_Tp& Mat_<_Tp>::operator ()(const Vec& idx) +{ + return Mat::at<_Tp>(idx); +} + +template template inline +const _Tp& Mat_<_Tp>::operator ()(const Vec& idx) const +{ + return Mat::at<_Tp>(idx); +} + +template inline +_Tp& Mat_<_Tp>::operator ()(int i0) +{ + return this->at<_Tp>(i0); +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(int i0) const +{ + return this->at<_Tp>(i0); +} + +template inline +_Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2) +{ + return this->at<_Tp>(i0, i1, i2); +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2) const +{ + return this->at<_Tp>(i0, i1, i2); +} + +template inline +Mat_<_Tp>::operator std::vector<_Tp>() const +{ + std::vector<_Tp> v; + copyTo(v); + return v; +} + +template template inline +Mat_<_Tp>::operator std::array<_Tp, _Nm>() const +{ + std::array<_Tp, _Nm> a; + copyTo(a); + return a; +} + +template template inline +Mat_<_Tp>::operator Vec::channel_type, n>() const +{ + CV_Assert(n % DataType<_Tp>::channels == 0); + +#if defined _MSC_VER + const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround) + return pMat->operator Vec::channel_type, n>(); +#else + return this->Mat::operator Vec::channel_type, n>(); +#endif +} + +template template inline +Mat_<_Tp>::operator Matx::channel_type, m, n>() const +{ + CV_Assert(n % DataType<_Tp>::channels == 0); + +#if defined _MSC_VER + const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround) + Matx::channel_type, m, n> res = pMat->operator Matx::channel_type, m, n>(); + return res; +#else + Matx::channel_type, m, n> res = this->Mat::operator Matx::channel_type, m, n>(); + return res; +#endif +} + +template inline +MatConstIterator_<_Tp> Mat_<_Tp>::begin() const +{ + return Mat::begin<_Tp>(); +} + +template inline +MatConstIterator_<_Tp> Mat_<_Tp>::end() const +{ + return Mat::end<_Tp>(); +} + +template inline +MatIterator_<_Tp> Mat_<_Tp>::begin() +{ + return Mat::begin<_Tp>(); +} + +template inline +MatIterator_<_Tp> Mat_<_Tp>::end() +{ + return Mat::end<_Tp>(); +} + +template template inline +void Mat_<_Tp>::forEach(const Functor& operation) { + Mat::forEach<_Tp, Functor>(operation); +} + +template template inline +void Mat_<_Tp>::forEach(const Functor& operation) const { + Mat::forEach<_Tp, Functor>(operation); +} + +template inline +Mat_<_Tp>::Mat_(Mat_&& m) + : Mat(std::move(m)) +{ +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (Mat_&& m) +{ + Mat::operator = (std::move(m)); + return *this; +} + +template inline +Mat_<_Tp>::Mat_(Mat&& m) + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; + *this = std::move(m); +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (Mat&& m) +{ + if (m.empty()) + { + release(); + return *this; + } + if( traits::Type<_Tp>::value == m.type() ) + { + Mat::operator = ((Mat&&)m); + return *this; + } + if( traits::Depth<_Tp>::value == m.depth() ) + { + Mat::operator = ((Mat&&)m.reshape(DataType<_Tp>::channels, m.dims, 0)); + return *this; + } + CV_DbgAssert(DataType<_Tp>::channels == m.channels()); + m.convertTo(*this, type()); + return *this; +} + +template inline +Mat_<_Tp>::Mat_(MatExpr&& e) + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; + *this = Mat(e); +} + + +///////////////////////////// SparseMat ///////////////////////////// + +inline +SparseMat SparseMat::clone() const +{ + SparseMat temp; + this->copyTo(temp); + return temp; +} + +inline +size_t SparseMat::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t SparseMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int SparseMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int SparseMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int SparseMat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +const int* SparseMat::size() const +{ + return hdr ? hdr->size : 0; +} + +inline +int SparseMat::size(int i) const +{ + if( hdr ) + { + CV_DbgAssert((unsigned)i < (unsigned)hdr->dims); + return hdr->size[i]; + } + return 0; +} + +inline +int SparseMat::dims() const +{ + return hdr ? hdr->dims : 0; +} + +inline +size_t SparseMat::nzcount() const +{ + return hdr ? hdr->nodeCount : 0; +} + +template inline +_Tp& SparseMat::ref(int i0, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(i0, true, hashval); +} + +template inline +_Tp& SparseMat::ref(int i0, int i1, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, true, hashval); +} + +template inline +_Tp& SparseMat::ref(int i0, int i1, int i2, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, i2, true, hashval); +} + +template inline +_Tp& SparseMat::ref(const int* idx, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(idx, true, hashval); +} + +template inline +_Tp SparseMat::value(int i0, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval); + return p ? *p : _Tp(); +} + +template inline +_Tp SparseMat::value(int i0, int i1, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval); + return p ? *p : _Tp(); +} + +template inline +_Tp SparseMat::value(int i0, int i1, int i2, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval); + return p ? *p : _Tp(); +} + +template inline +_Tp SparseMat::value(const int* idx, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval); + return p ? *p : _Tp(); +} + +template inline +const _Tp* SparseMat::find(int i0, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval); +} + +template inline +const _Tp* SparseMat::find(int i0, int i1, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval); +} + +template inline +const _Tp* SparseMat::find(int i0, int i1, int i2, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval); +} + +template inline +const _Tp* SparseMat::find(const int* idx, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval); +} + +template inline +_Tp& SparseMat::value(Node* n) +{ + return *(_Tp*)((uchar*)n + hdr->valueOffset); +} + +template inline +const _Tp& SparseMat::value(const Node* n) const +{ + return *(const _Tp*)((const uchar*)n + hdr->valueOffset); +} + +inline +SparseMat::Node* SparseMat::node(size_t nidx) +{ + return (Node*)(void*)&hdr->pool[nidx]; +} + +inline +const SparseMat::Node* SparseMat::node(size_t nidx) const +{ + return (const Node*)(const void*)&hdr->pool[nidx]; +} + +inline +SparseMatIterator SparseMat::begin() +{ + return SparseMatIterator(this); +} + +inline +SparseMatConstIterator SparseMat::begin() const +{ + return SparseMatConstIterator(this); +} + +inline +SparseMatIterator SparseMat::end() +{ + SparseMatIterator it(this); + it.seekEnd(); + return it; +} + +inline +SparseMatConstIterator SparseMat::end() const +{ + SparseMatConstIterator it(this); + it.seekEnd(); + return it; +} + +template inline +SparseMatIterator_<_Tp> SparseMat::begin() +{ + return SparseMatIterator_<_Tp>(this); +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat::begin() const +{ + return SparseMatConstIterator_<_Tp>(this); +} + +template inline +SparseMatIterator_<_Tp> SparseMat::end() +{ + SparseMatIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat::end() const +{ + SparseMatConstIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + + + +///////////////////////////// SparseMat_ //////////////////////////// + +template inline +SparseMat_<_Tp>::SparseMat_() +{ + flags = MAGIC_VAL + traits::Type<_Tp>::value; +} + +template inline +SparseMat_<_Tp>::SparseMat_(int _dims, const int* _sizes) + : SparseMat(_dims, _sizes, traits::Type<_Tp>::value) +{} + +template inline +SparseMat_<_Tp>::SparseMat_(const SparseMat& m) +{ + if( m.type() == traits::Type<_Tp>::value ) + *this = (const SparseMat_<_Tp>&)m; + else + m.convertTo(*this, traits::Type<_Tp>::value); +} + +template inline +SparseMat_<_Tp>::SparseMat_(const SparseMat_<_Tp>& m) +{ + this->flags = m.flags; + this->hdr = m.hdr; + if( this->hdr ) + CV_XADD(&this->hdr->refcount, 1); +} + +template inline +SparseMat_<_Tp>::SparseMat_(const Mat& m) +{ + SparseMat sm(m); + *this = sm; +} + +template inline +SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat_<_Tp>& m) +{ + if( this != &m ) + { + if( m.hdr ) CV_XADD(&m.hdr->refcount, 1); + release(); + flags = m.flags; + hdr = m.hdr; + } + return *this; +} + +template inline +SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat& m) +{ + if( m.type() == traits::Type<_Tp>::value ) + return (*this = (const SparseMat_<_Tp>&)m); + m.convertTo(*this, traits::Type<_Tp>::value); + return *this; +} + +template inline +SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const Mat& m) +{ + return (*this = SparseMat(m)); +} + +template inline +SparseMat_<_Tp> SparseMat_<_Tp>::clone() const +{ + SparseMat_<_Tp> m; + this->copyTo(m); + return m; +} + +template inline +void SparseMat_<_Tp>::create(int _dims, const int* _sizes) +{ + SparseMat::create(_dims, _sizes, traits::Type<_Tp>::value); +} + +template inline +int SparseMat_<_Tp>::type() const +{ + return traits::Type<_Tp>::value; +} + +template inline +int SparseMat_<_Tp>::depth() const +{ + return traits::Depth<_Tp>::value; +} + +template inline +int SparseMat_<_Tp>::channels() const +{ + return DataType<_Tp>::channels; +} + +template inline +_Tp& SparseMat_<_Tp>::ref(int i0, size_t* hashval) +{ + return SparseMat::ref<_Tp>(i0, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(int i0, size_t* hashval) const +{ + return SparseMat::value<_Tp>(i0, hashval); +} + +template inline +_Tp& SparseMat_<_Tp>::ref(int i0, int i1, size_t* hashval) +{ + return SparseMat::ref<_Tp>(i0, i1, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(int i0, int i1, size_t* hashval) const +{ + return SparseMat::value<_Tp>(i0, i1, hashval); +} + +template inline +_Tp& SparseMat_<_Tp>::ref(int i0, int i1, int i2, size_t* hashval) +{ + return SparseMat::ref<_Tp>(i0, i1, i2, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(int i0, int i1, int i2, size_t* hashval) const +{ + return SparseMat::value<_Tp>(i0, i1, i2, hashval); +} + +template inline +_Tp& SparseMat_<_Tp>::ref(const int* idx, size_t* hashval) +{ + return SparseMat::ref<_Tp>(idx, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(const int* idx, size_t* hashval) const +{ + return SparseMat::value<_Tp>(idx, hashval); +} + +template inline +SparseMatIterator_<_Tp> SparseMat_<_Tp>::begin() +{ + return SparseMatIterator_<_Tp>(this); +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::begin() const +{ + return SparseMatConstIterator_<_Tp>(this); +} + +template inline +SparseMatIterator_<_Tp> SparseMat_<_Tp>::end() +{ + SparseMatIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::end() const +{ + SparseMatConstIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + + + +////////////////////////// MatConstIterator ///////////////////////// + +inline +MatConstIterator::MatConstIterator() + : m(0), elemSize(0), ptr(0), sliceStart(0), sliceEnd(0) +{} + +inline +MatConstIterator::MatConstIterator(const Mat* _m) + : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0) +{ + if( m && m->isContinuous() ) + { + CV_Assert(!m->empty()); + sliceStart = m->ptr(); + sliceEnd = sliceStart + m->total()*elemSize; + } + seek((const int*)0); +} + +inline +MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col) + : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0) +{ + CV_Assert(m && m->dims <= 2); + if( m->isContinuous() ) + { + CV_Assert(!m->empty()); + sliceStart = m->ptr(); + sliceEnd = sliceStart + m->total()*elemSize; + } + int idx[] = {_row, _col}; + seek(idx); +} + +inline +MatConstIterator::MatConstIterator(const Mat* _m, Point _pt) + : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0) +{ + CV_Assert(m && m->dims <= 2); + if( m->isContinuous() ) + { + CV_Assert(!m->empty()); + sliceStart = m->ptr(); + sliceEnd = sliceStart + m->total()*elemSize; + } + int idx[] = {_pt.y, _pt.x}; + seek(idx); +} + +inline +MatConstIterator::MatConstIterator(const MatConstIterator& it) + : m(it.m), elemSize(it.elemSize), ptr(it.ptr), sliceStart(it.sliceStart), sliceEnd(it.sliceEnd) +{} + +inline +MatConstIterator& MatConstIterator::operator = (const MatConstIterator& it ) +{ + m = it.m; elemSize = it.elemSize; ptr = it.ptr; + sliceStart = it.sliceStart; sliceEnd = it.sliceEnd; + return *this; +} + +inline +const uchar* MatConstIterator::operator *() const +{ + return ptr; +} + +inline MatConstIterator& MatConstIterator::operator += (ptrdiff_t ofs) +{ + if( !m || ofs == 0 ) + return *this; + ptrdiff_t ofsb = ofs*elemSize; + ptr += ofsb; + if( ptr < sliceStart || sliceEnd <= ptr ) + { + ptr -= ofsb; + seek(ofs, true); + } + return *this; +} + +inline +MatConstIterator& MatConstIterator::operator -= (ptrdiff_t ofs) +{ + return (*this += -ofs); +} + +inline +MatConstIterator& MatConstIterator::operator --() +{ + if( m && (ptr -= elemSize) < sliceStart ) + { + ptr += elemSize; + seek(-1, true); + } + return *this; +} + +inline +MatConstIterator MatConstIterator::operator --(int) +{ + MatConstIterator b = *this; + *this += -1; + return b; +} + +inline +MatConstIterator& MatConstIterator::operator ++() +{ + if( m && (ptr += elemSize) >= sliceEnd ) + { + ptr -= elemSize; + seek(1, true); + } + return *this; +} + +inline MatConstIterator MatConstIterator::operator ++(int) +{ + MatConstIterator b = *this; + *this += 1; + return b; +} + + +static inline +bool operator == (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.m == b.m && a.ptr == b.ptr; +} + +static inline +bool operator != (const MatConstIterator& a, const MatConstIterator& b) +{ + return !(a == b); +} + +static inline +bool operator < (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr < b.ptr; +} + +static inline +bool operator > (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr > b.ptr; +} + +static inline +bool operator <= (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr <= b.ptr; +} + +static inline +bool operator >= (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr >= b.ptr; +} + +static inline +ptrdiff_t operator - (const MatConstIterator& b, const MatConstIterator& a) +{ + if( a.m != b.m ) + return ((size_t)(-1) >> 1); + if( a.sliceEnd == b.sliceEnd ) + return (b.ptr - a.ptr)/static_cast(b.elemSize); + + return b.lpos() - a.lpos(); +} + +static inline +MatConstIterator operator + (const MatConstIterator& a, ptrdiff_t ofs) +{ + MatConstIterator b = a; + return b += ofs; +} + +static inline +MatConstIterator operator + (ptrdiff_t ofs, const MatConstIterator& a) +{ + MatConstIterator b = a; + return b += ofs; +} + +static inline +MatConstIterator operator - (const MatConstIterator& a, ptrdiff_t ofs) +{ + MatConstIterator b = a; + return b += -ofs; +} + + +inline +const uchar* MatConstIterator::operator [](ptrdiff_t i) const +{ + return *(*this + i); +} + + + +///////////////////////// MatConstIterator_ ///////////////////////// + +template inline +MatConstIterator_<_Tp>::MatConstIterator_() +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m) + : MatConstIterator(_m) +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col) + : MatConstIterator(_m, _row, _col) +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, Point _pt) + : MatConstIterator(_m, _pt) +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const MatConstIterator_& it) + : MatConstIterator(it) +{} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator = (const MatConstIterator_& it ) +{ + MatConstIterator::operator = (it); + return *this; +} + +template inline +const _Tp& MatConstIterator_<_Tp>::operator *() const +{ + return *(_Tp*)(this->ptr); +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator += (ptrdiff_t ofs) +{ + MatConstIterator::operator += (ofs); + return *this; +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator -= (ptrdiff_t ofs) +{ + return (*this += -ofs); +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator --() +{ + MatConstIterator::operator --(); + return *this; +} + +template inline +MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator --(int) +{ + MatConstIterator_ b = *this; + MatConstIterator::operator --(); + return b; +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator ++() +{ + MatConstIterator::operator ++(); + return *this; +} + +template inline +MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator ++(int) +{ + MatConstIterator_ b = *this; + MatConstIterator::operator ++(); + return b; +} + + +template inline +Point MatConstIterator_<_Tp>::pos() const +{ + if( !m ) + return Point(); + CV_DbgAssert( m->dims <= 2 ); + if( m->isContinuous() ) + { + ptrdiff_t ofs = (const _Tp*)ptr - (const _Tp*)m->data; + int y = (int)(ofs / m->cols); + int x = (int)(ofs - (ptrdiff_t)y * m->cols); + return Point(x, y); + } + else + { + ptrdiff_t ofs = (uchar*)ptr - m->data; + int y = (int)(ofs / m->step); + int x = (int)((ofs - y * m->step)/sizeof(_Tp)); + return Point(x, y); + } +} + + +template static inline +bool operator == (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b) +{ + return a.m == b.m && a.ptr == b.ptr; +} + +template static inline +bool operator != (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b) +{ + return a.m != b.m || a.ptr != b.ptr; +} + +template static inline +MatConstIterator_<_Tp> operator + (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatConstIterator_<_Tp>&)t; +} + +template static inline +MatConstIterator_<_Tp> operator + (ptrdiff_t ofs, const MatConstIterator_<_Tp>& a) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatConstIterator_<_Tp>&)t; +} + +template static inline +MatConstIterator_<_Tp> operator - (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a - ofs; + return (MatConstIterator_<_Tp>&)t; +} + +template inline +const _Tp& MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const +{ + return *(_Tp*)MatConstIterator::operator [](i); +} + + + +//////////////////////////// MatIterator_ /////////////////////////// + +template inline +MatIterator_<_Tp>::MatIterator_() + : MatConstIterator_<_Tp>() +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m) + : MatConstIterator_<_Tp>(_m) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, int _row, int _col) + : MatConstIterator_<_Tp>(_m, _row, _col) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, Point _pt) + : MatConstIterator_<_Tp>(_m, _pt) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, const int* _idx) + : MatConstIterator_<_Tp>(_m, _idx) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(const MatIterator_& it) + : MatConstIterator_<_Tp>(it) +{} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator = (const MatIterator_<_Tp>& it ) +{ + MatConstIterator::operator = (it); + return *this; +} + +template inline +_Tp& MatIterator_<_Tp>::operator *() const +{ + return *(_Tp*)(this->ptr); +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator += (ptrdiff_t ofs) +{ + MatConstIterator::operator += (ofs); + return *this; +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator -= (ptrdiff_t ofs) +{ + MatConstIterator::operator += (-ofs); + return *this; +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator --() +{ + MatConstIterator::operator --(); + return *this; +} + +template inline +MatIterator_<_Tp> MatIterator_<_Tp>::operator --(int) +{ + MatIterator_ b = *this; + MatConstIterator::operator --(); + return b; +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator ++() +{ + MatConstIterator::operator ++(); + return *this; +} + +template inline +MatIterator_<_Tp> MatIterator_<_Tp>::operator ++(int) +{ + MatIterator_ b = *this; + MatConstIterator::operator ++(); + return b; +} + +template inline +_Tp& MatIterator_<_Tp>::operator [](ptrdiff_t i) const +{ + return *(*this + i); +} + + +template static inline +bool operator == (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b) +{ + return a.m == b.m && a.ptr == b.ptr; +} + +template static inline +bool operator != (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b) +{ + return a.m != b.m || a.ptr != b.ptr; +} + +template static inline +MatIterator_<_Tp> operator + (const MatIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatIterator_<_Tp>&)t; +} + +template static inline +MatIterator_<_Tp> operator + (ptrdiff_t ofs, const MatIterator_<_Tp>& a) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatIterator_<_Tp>&)t; +} + +template static inline +MatIterator_<_Tp> operator - (const MatIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a - ofs; + return (MatIterator_<_Tp>&)t; +} + + + +/////////////////////// SparseMatConstIterator ////////////////////// + +inline +SparseMatConstIterator::SparseMatConstIterator() + : m(0), hashidx(0), ptr(0) +{} + +inline +SparseMatConstIterator::SparseMatConstIterator(const SparseMatConstIterator& it) + : m(it.m), hashidx(it.hashidx), ptr(it.ptr) +{} + +inline SparseMatConstIterator& SparseMatConstIterator::operator = (const SparseMatConstIterator& it) +{ + if( this != &it ) + { + m = it.m; + hashidx = it.hashidx; + ptr = it.ptr; + } + return *this; +} + +template inline +const _Tp& SparseMatConstIterator::value() const +{ + return *(const _Tp*)ptr; +} + +inline +const SparseMat::Node* SparseMatConstIterator::node() const +{ + return (ptr && m && m->hdr) ? (const SparseMat::Node*)(const void*)(ptr - m->hdr->valueOffset) : 0; +} + +inline +SparseMatConstIterator SparseMatConstIterator::operator ++(int) +{ + SparseMatConstIterator it = *this; + ++*this; + return it; +} + +inline +void SparseMatConstIterator::seekEnd() +{ + if( m && m->hdr ) + { + hashidx = m->hdr->hashtab.size(); + ptr = 0; + } +} + + +static inline +bool operator == (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2) +{ + return it1.m == it2.m && it1.ptr == it2.ptr; +} + +static inline +bool operator != (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2) +{ + return !(it1 == it2); +} + + + +///////////////////////// SparseMatIterator ///////////////////////// + +inline +SparseMatIterator::SparseMatIterator() +{} + +inline +SparseMatIterator::SparseMatIterator(SparseMat* _m) + : SparseMatConstIterator(_m) +{} + +inline +SparseMatIterator::SparseMatIterator(const SparseMatIterator& it) + : SparseMatConstIterator(it) +{} + +inline +SparseMatIterator& SparseMatIterator::operator = (const SparseMatIterator& it) +{ + (SparseMatConstIterator&)*this = it; + return *this; +} + +template inline +_Tp& SparseMatIterator::value() const +{ + return *(_Tp*)ptr; +} + +inline +SparseMat::Node* SparseMatIterator::node() const +{ + return (SparseMat::Node*)SparseMatConstIterator::node(); +} + +inline +SparseMatIterator& SparseMatIterator::operator ++() +{ + SparseMatConstIterator::operator ++(); + return *this; +} + +inline +SparseMatIterator SparseMatIterator::operator ++(int) +{ + SparseMatIterator it = *this; + ++*this; + return it; +} + + + +////////////////////// SparseMatConstIterator_ ////////////////////// + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_() +{} + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat_<_Tp>* _m) + : SparseMatConstIterator(_m) +{} + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m) + : SparseMatConstIterator(_m) +{ + CV_Assert( _m->type() == traits::Type<_Tp>::value ); +} + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMatConstIterator_<_Tp>& it) + : SparseMatConstIterator(it) +{} + +template inline +SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator = (const SparseMatConstIterator_<_Tp>& it) +{ + return reinterpret_cast&> + (*reinterpret_cast(this) = + reinterpret_cast(it)); +} + +template inline +const _Tp& SparseMatConstIterator_<_Tp>::operator *() const +{ + return *(const _Tp*)this->ptr; +} + +template inline +SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator ++() +{ + SparseMatConstIterator::operator ++(); + return *this; +} + +template inline +SparseMatConstIterator_<_Tp> SparseMatConstIterator_<_Tp>::operator ++(int) +{ + SparseMatConstIterator_<_Tp> it = *this; + SparseMatConstIterator::operator ++(); + return it; +} + + + +///////////////////////// SparseMatIterator_ //////////////////////// + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_() +{} + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat_<_Tp>* _m) + : SparseMatConstIterator_<_Tp>(_m) +{} + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat* _m) + : SparseMatConstIterator_<_Tp>(_m) +{} + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_(const SparseMatIterator_<_Tp>& it) + : SparseMatConstIterator_<_Tp>(it) +{} + +template inline +SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator = (const SparseMatIterator_<_Tp>& it) +{ + return reinterpret_cast&> + (*reinterpret_cast(this) = + reinterpret_cast(it)); +} + +template inline +_Tp& SparseMatIterator_<_Tp>::operator *() const +{ + return *(_Tp*)this->ptr; +} + +template inline +SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator ++() +{ + SparseMatConstIterator::operator ++(); + return *this; +} + +template inline +SparseMatIterator_<_Tp> SparseMatIterator_<_Tp>::operator ++(int) +{ + SparseMatIterator_<_Tp> it = *this; + SparseMatConstIterator::operator ++(); + return it; +} + + + +//////////////////////// MatCommaInitializer_ /////////////////////// + +template inline +MatCommaInitializer_<_Tp>::MatCommaInitializer_(Mat_<_Tp>* _m) + : it(_m) +{} + +template template inline +MatCommaInitializer_<_Tp>& MatCommaInitializer_<_Tp>::operator , (T2 v) +{ + CV_DbgAssert( this->it < ((const Mat_<_Tp>*)this->it.m)->end() ); + *this->it = _Tp(v); + ++this->it; + return *this; +} + +template inline +MatCommaInitializer_<_Tp>::operator Mat_<_Tp>() const +{ + CV_DbgAssert( this->it == ((const Mat_<_Tp>*)this->it.m)->end() ); + return Mat_<_Tp>(*this->it.m); +} + + +template static inline +MatCommaInitializer_<_Tp> operator << (const Mat_<_Tp>& m, T2 val) +{ + MatCommaInitializer_<_Tp> commaInitializer((Mat_<_Tp>*)&m); + return (commaInitializer, val); +} + + + +///////////////////////// Matrix Expressions //////////////////////// + +inline +Mat& Mat::operator = (const MatExpr& e) +{ + e.op->assign(e, *this); + return *this; +} + +template inline +Mat_<_Tp>::Mat_(const MatExpr& e) +{ + e.op->assign(e, *this, traits::Type<_Tp>::value); +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const MatExpr& e) +{ + e.op->assign(e, *this, traits::Type<_Tp>::value); + return *this; +} + +template inline +MatExpr Mat_<_Tp>::zeros(int rows, int cols) +{ + return Mat::zeros(rows, cols, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::zeros(Size sz) +{ + return Mat::zeros(sz, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::ones(int rows, int cols) +{ + return Mat::ones(rows, cols, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::ones(Size sz) +{ + return Mat::ones(sz, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::eye(int rows, int cols) +{ + return Mat::eye(rows, cols, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::eye(Size sz) +{ + return Mat::eye(sz, traits::Type<_Tp>::value); +} + +inline +MatExpr::MatExpr() + : op(0), flags(0), a(Mat()), b(Mat()), c(Mat()), alpha(0), beta(0), s() +{} + +inline +MatExpr::MatExpr(const MatOp* _op, int _flags, const Mat& _a, const Mat& _b, + const Mat& _c, double _alpha, double _beta, const Scalar& _s) + : op(_op), flags(_flags), a(_a), b(_b), c(_c), alpha(_alpha), beta(_beta), s(_s) +{} + +inline +MatExpr::operator Mat() const +{ + Mat m; + op->assign(*this, m); + return m; +} + +template inline +MatExpr::operator Mat_<_Tp>() const +{ + Mat_<_Tp> m; + op->assign(*this, m, traits::Type<_Tp>::value); + return m; +} + + +template static inline +MatExpr min(const Mat_<_Tp>& a, const Mat_<_Tp>& b) +{ + return cv::min((const Mat&)a, (const Mat&)b); +} + +template static inline +MatExpr min(const Mat_<_Tp>& a, double s) +{ + return cv::min((const Mat&)a, s); +} + +template static inline +MatExpr min(double s, const Mat_<_Tp>& a) +{ + return cv::min((const Mat&)a, s); +} + +template static inline +MatExpr max(const Mat_<_Tp>& a, const Mat_<_Tp>& b) +{ + return cv::max((const Mat&)a, (const Mat&)b); +} + +template static inline +MatExpr max(const Mat_<_Tp>& a, double s) +{ + return cv::max((const Mat&)a, s); +} + +template static inline +MatExpr max(double s, const Mat_<_Tp>& a) +{ + return cv::max((const Mat&)a, s); +} + +template static inline +MatExpr abs(const Mat_<_Tp>& m) +{ + return cv::abs((const Mat&)m); +} + + +static inline +Mat& operator += (Mat& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, a); + return a; +} + +static inline +const Mat& operator += (const Mat& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator += (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator += (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, (Mat&)a); + return a; +} + +static inline +Mat& operator -= (Mat& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, a); + return a; +} + +static inline +const Mat& operator -= (const Mat& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator -= (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator -= (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, (Mat&)a); + return a; +} + +static inline +Mat& operator *= (Mat& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, a); + return a; +} + +static inline +const Mat& operator *= (const Mat& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator *= (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator *= (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, (Mat&)a); + return a; +} + +static inline +Mat& operator /= (Mat& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, a); + return a; +} + +static inline +const Mat& operator /= (const Mat& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator /= (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator /= (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, (Mat&)a); + return a; +} + + +//////////////////////////////// UMat //////////////////////////////// + +template inline +UMat::UMat(const std::vector<_Tp>& vec, bool copyData) +: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), +cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows) +{ + if(vec.empty()) + return; + if( !copyData ) + { + // !!!TODO!!! + CV_Error(Error::StsNotImplemented, ""); + } + else + Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this); +} + +inline +UMat UMat::row(int y) const +{ + return UMat(*this, Range(y, y + 1), Range::all()); +} + +inline +UMat UMat::col(int x) const +{ + return UMat(*this, Range::all(), Range(x, x + 1)); +} + +inline +UMat UMat::rowRange(int startrow, int endrow) const +{ + return UMat(*this, Range(startrow, endrow), Range::all()); +} + +inline +UMat UMat::rowRange(const Range& r) const +{ + return UMat(*this, r, Range::all()); +} + +inline +UMat UMat::colRange(int startcol, int endcol) const +{ + return UMat(*this, Range::all(), Range(startcol, endcol)); +} + +inline +UMat UMat::colRange(const Range& r) const +{ + return UMat(*this, Range::all(), r); +} + +inline +UMat UMat::operator()( Range _rowRange, Range _colRange ) const +{ + return UMat(*this, _rowRange, _colRange); +} + +inline +UMat UMat::operator()( const Rect& roi ) const +{ + return UMat(*this, roi); +} + +inline +UMat UMat::operator()(const Range* ranges) const +{ + return UMat(*this, ranges); +} + +inline +UMat UMat::operator()(const std::vector& ranges) const +{ + return UMat(*this, ranges); +} + +inline +bool UMat::isContinuous() const +{ + return (flags & CONTINUOUS_FLAG) != 0; +} + +inline +bool UMat::isSubmatrix() const +{ + return (flags & SUBMATRIX_FLAG) != 0; +} + +inline +size_t UMat::elemSize() const +{ + size_t res = dims > 0 ? step.p[dims - 1] : 0; + CV_DbgAssert(res != 0); + return res; +} + +inline +size_t UMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int UMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int UMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int UMat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +size_t UMat::step1(int i) const +{ + return step.p[i] / elemSize1(); +} + + +inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; } +inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; } +inline bool UMatData::deviceMemMapped() const { return (flags & DEVICE_MEM_MAPPED) != 0; } +inline bool UMatData::copyOnMap() const { return (flags & COPY_ON_MAP) != 0; } +inline bool UMatData::tempUMat() const { return (flags & TEMP_UMAT) != 0; } +inline bool UMatData::tempCopiedUMat() const { return (flags & TEMP_COPIED_UMAT) == TEMP_COPIED_UMAT; } + +inline void UMatData::markDeviceMemMapped(bool flag) +{ + if(flag) + flags |= DEVICE_MEM_MAPPED; + else + flags &= ~DEVICE_MEM_MAPPED; +} + +inline void UMatData::markHostCopyObsolete(bool flag) +{ + if(flag) + flags |= HOST_COPY_OBSOLETE; + else + flags &= ~HOST_COPY_OBSOLETE; +} +inline void UMatData::markDeviceCopyObsolete(bool flag) +{ + if(flag) + flags |= DEVICE_COPY_OBSOLETE; + else + flags &= ~DEVICE_COPY_OBSOLETE; +} + +//! @endcond + +static inline +void swap(MatExpr& a, MatExpr& b) { a.swap(b); } + +} //cv + +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + +#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS +#undef CV_DISABLE_CLANG_ENUM_WARNINGS +#pragma clang diagnostic pop +#endif + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/matx.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/matx.hpp new file mode 100644 index 0000000..45d8a97 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/matx.hpp @@ -0,0 +1,1508 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_MATX_HPP +#define OPENCV_CORE_MATX_HPP + +#ifndef __cplusplus +# error matx.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/base.hpp" +#include "opencv2/core/traits.hpp" +#include "opencv2/core/saturate.hpp" + +#include + +namespace cv +{ + +//! @addtogroup core_basic +//! @{ + +////////////////////////////// Small Matrix /////////////////////////// + +//! @cond IGNORED +// FIXIT Remove this (especially CV_EXPORTS modifier) +struct CV_EXPORTS Matx_AddOp { Matx_AddOp() {} Matx_AddOp(const Matx_AddOp&) {} }; +struct CV_EXPORTS Matx_SubOp { Matx_SubOp() {} Matx_SubOp(const Matx_SubOp&) {} }; +struct CV_EXPORTS Matx_ScaleOp { Matx_ScaleOp() {} Matx_ScaleOp(const Matx_ScaleOp&) {} }; +struct CV_EXPORTS Matx_MulOp { Matx_MulOp() {} Matx_MulOp(const Matx_MulOp&) {} }; +struct CV_EXPORTS Matx_DivOp { Matx_DivOp() {} Matx_DivOp(const Matx_DivOp&) {} }; +struct CV_EXPORTS Matx_MatMulOp { Matx_MatMulOp() {} Matx_MatMulOp(const Matx_MatMulOp&) {} }; +struct CV_EXPORTS Matx_TOp { Matx_TOp() {} Matx_TOp(const Matx_TOp&) {} }; +//! @endcond + +/** @brief Template class for small matrices whose type and size are known at compilation time + +If you need a more flexible type, use Mat . The elements of the matrix M are accessible using the +M(i,j) notation. Most of the common matrix operations (see also @ref MatrixExpressions ) are +available. To do an operation on Matx that is not implemented, you can easily convert the matrix to +Mat and backwards: +@code{.cpp} + Matx33f m(1, 2, 3, + 4, 5, 6, + 7, 8, 9); + cout << sum(Mat(m*m.t())) << endl; +@endcode +Except of the plain constructor which takes a list of elements, Matx can be initialized from a C-array: +@code{.cpp} + float values[] = { 1, 2, 3}; + Matx31f m(values); +@endcode +In case if C++11 features are available, std::initializer_list can be also used to initialize Matx: +@code{.cpp} + Matx31f m = { 1, 2, 3}; +@endcode + */ +template class Matx +{ +public: + enum { + rows = m, + cols = n, + channels = rows*cols, +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = traits::Type<_Tp>::value, + type = CV_MAKETYPE(depth, channels), +#endif + shortdim = (m < n ? m : n) + }; + + typedef _Tp value_type; + typedef Matx<_Tp, m, n> mat_type; + typedef Matx<_Tp, shortdim, 1> diag_type; + + //! default constructor + Matx(); + + explicit Matx(_Tp v0); //!< 1x1 matrix + Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 1x5 or 5x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 1x6, 2x3, 3x2 or 6x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 1x7 or 7x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 1x8, 2x4, 4x2 or 8x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 1x9, 3x3 or 9x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 1x10, 2x5 or 5x2 or 10x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, + _Tp v4, _Tp v5, _Tp v6, _Tp v7, + _Tp v8, _Tp v9, _Tp v10, _Tp v11); //!< 1x12, 2x6, 3x4, 4x3, 6x2 or 12x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, + _Tp v4, _Tp v5, _Tp v6, _Tp v7, + _Tp v8, _Tp v9, _Tp v10, _Tp v11, + _Tp v12, _Tp v13); //!< 1x14, 2x7, 7x2 or 14x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, + _Tp v4, _Tp v5, _Tp v6, _Tp v7, + _Tp v8, _Tp v9, _Tp v10, _Tp v11, + _Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix + explicit Matx(const _Tp* vals); //!< initialize from a plain array + + Matx(std::initializer_list<_Tp>); //!< initialize from an initializer list + + static Matx all(_Tp alpha); + static Matx zeros(); + static Matx ones(); + static Matx eye(); + static Matx diag(const diag_type& d); + /** @brief Generates uniformly distributed random numbers + @param a Range boundary. + @param b The other range boundary (boundaries don't have to be ordered, the lower boundary is inclusive, + the upper one is exclusive). + */ + static Matx randu(_Tp a, _Tp b); + /** @brief Generates normally distributed random numbers + @param a Mean value. + @param b Standard deviation. + */ + static Matx randn(_Tp a, _Tp b); + + //! dot product computed with the default precision + _Tp dot(const Matx<_Tp, m, n>& v) const; + + //! dot product computed in double-precision arithmetics + double ddot(const Matx<_Tp, m, n>& v) const; + + //! conversion to another data type + template operator Matx() const; + + //! change the matrix shape + template Matx<_Tp, m1, n1> reshape() const; + + //! extract part of the matrix + template Matx<_Tp, m1, n1> get_minor(int base_row, int base_col) const; + + //! extract the matrix row + Matx<_Tp, 1, n> row(int i) const; + + //! extract the matrix column + Matx<_Tp, m, 1> col(int i) const; + + //! extract the matrix diagonal + diag_type diag() const; + + //! transpose the matrix + Matx<_Tp, n, m> t() const; + + //! invert the matrix + Matx<_Tp, n, m> inv(int method=DECOMP_LU, bool *p_is_ok = NULL) const; + + //! solve linear system + template Matx<_Tp, n, l> solve(const Matx<_Tp, m, l>& rhs, int flags=DECOMP_LU) const; + Vec<_Tp, n> solve(const Vec<_Tp, m>& rhs, int method) const; + + //! multiply two matrices element-wise + Matx<_Tp, m, n> mul(const Matx<_Tp, m, n>& a) const; + + //! divide two matrices element-wise + Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const; + + //! element access + const _Tp& operator ()(int row, int col) const; + _Tp& operator ()(int row, int col); + + //! 1D element access + const _Tp& operator ()(int i) const; + _Tp& operator ()(int i); + + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp); + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp); + template Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp); + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp); + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp); + template Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp); + Matx(const Matx<_Tp, n, m>& a, Matx_TOp); + + _Tp val[m*n]; //< matrix elements +}; + +typedef Matx Matx12f; +typedef Matx Matx12d; +typedef Matx Matx13f; +typedef Matx Matx13d; +typedef Matx Matx14f; +typedef Matx Matx14d; +typedef Matx Matx16f; +typedef Matx Matx16d; + +typedef Matx Matx21f; +typedef Matx Matx21d; +typedef Matx Matx31f; +typedef Matx Matx31d; +typedef Matx Matx41f; +typedef Matx Matx41d; +typedef Matx Matx61f; +typedef Matx Matx61d; + +typedef Matx Matx22f; +typedef Matx Matx22d; +typedef Matx Matx23f; +typedef Matx Matx23d; +typedef Matx Matx32f; +typedef Matx Matx32d; + +typedef Matx Matx33f; +typedef Matx Matx33d; + +typedef Matx Matx34f; +typedef Matx Matx34d; +typedef Matx Matx43f; +typedef Matx Matx43d; + +typedef Matx Matx44f; +typedef Matx Matx44d; +typedef Matx Matx66f; +typedef Matx Matx66d; + +/*! + traits +*/ +template class DataType< Matx<_Tp, m, n> > +{ +public: + typedef Matx<_Tp, m, n> value_type; + typedef Matx::work_type, m, n> work_type; + typedef _Tp channel_type; + typedef value_type vec_type; + + enum { generic_type = 0, + channels = m * n, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; +}; + +namespace traits { +template +struct Depth< Matx<_Tp, m, n> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Matx<_Tp, m, n> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, n*m) }; }; +} // namespace + + +/** @brief Comma-separated Matrix Initializer +*/ +template class MatxCommaInitializer +{ +public: + MatxCommaInitializer(Matx<_Tp, m, n>* _mtx); + template MatxCommaInitializer<_Tp, m, n>& operator , (T2 val); + Matx<_Tp, m, n> operator *() const; + + Matx<_Tp, m, n>* dst; + int idx; +}; + +/* + Utility methods +*/ +template static double determinant(const Matx<_Tp, m, m>& a); +template static double trace(const Matx<_Tp, m, n>& a); +template static double norm(const Matx<_Tp, m, n>& M); +template static double norm(const Matx<_Tp, m, n>& M, int normType); + + + +/////////////////////// Vec (used as element of multi-channel images ///////////////////// + +/** @brief Template class for short numerical vectors, a partial case of Matx + +This template class represents short numerical vectors (of 1, 2, 3, 4 ... elements) on which you +can perform basic arithmetical operations, access individual elements using [] operator etc. The +vectors are allocated on stack, as opposite to std::valarray, std::vector, cv::Mat etc., which +elements are dynamically allocated in the heap. + +The template takes 2 parameters: +@tparam _Tp element type +@tparam cn the number of elements + +In addition to the universal notation like Vec, you can use shorter aliases +for the most popular specialized variants of Vec, e.g. Vec3f ~ Vec. + +It is possible to convert Vec\ to/from Point_, Vec\ to/from Point3_ , and Vec\ +to CvScalar or Scalar_. Use operator[] to access the elements of Vec. + +All the expected vector operations are also implemented: +- v1 = v2 + v3 +- v1 = v2 - v3 +- v1 = v2 \* scale +- v1 = scale \* v2 +- v1 = -v2 +- v1 += v2 and other augmenting operations +- v1 == v2, v1 != v2 +- norm(v1) (euclidean norm) +The Vec class is commonly used to describe pixel types of multi-channel arrays. See Mat for details. +*/ +template class Vec : public Matx<_Tp, cn, 1> +{ +public: + typedef _Tp value_type; + enum { + channels = cn, +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = Matx<_Tp, cn, 1>::depth, + type = CV_MAKETYPE(depth, channels), +#endif + _dummy_enum_finalizer = 0 + }; + + //! default constructor + Vec(); + + Vec(_Tp v0); //!< 1-element vector constructor + Vec(_Tp v0, _Tp v1); //!< 2-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2); //!< 3-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 4-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 5-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 6-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 7-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 8-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 9-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 10-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor + explicit Vec(const _Tp* values); + + Vec(std::initializer_list<_Tp>); + + Vec(const Vec<_Tp, cn>& v); + + static Vec all(_Tp alpha); + + //! per-element multiplication + Vec mul(const Vec<_Tp, cn>& v) const; + + //! conjugation (makes sense for complex numbers and quaternions) + Vec conj() const; + + /*! + cross product of the two 3D vectors. + + For other dimensionalities the exception is raised + */ + Vec cross(const Vec& v) const; + //! conversion to another data type + template operator Vec() const; + + /*! element access */ + const _Tp& operator [](int i) const; + _Tp& operator[](int i); + const _Tp& operator ()(int i) const; + _Tp& operator ()(int i); + +#ifdef CV_CXX11 + Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default; +#endif + + Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp); + Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp); + template Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp); +}; + +/** @name Shorter aliases for the most popular specializations of Vec + @{ +*/ +typedef Vec Vec2b; +typedef Vec Vec3b; +typedef Vec Vec4b; + +typedef Vec Vec2s; +typedef Vec Vec3s; +typedef Vec Vec4s; + +typedef Vec Vec2w; +typedef Vec Vec3w; +typedef Vec Vec4w; + +typedef Vec Vec2i; +typedef Vec Vec3i; +typedef Vec Vec4i; +typedef Vec Vec6i; +typedef Vec Vec8i; + +typedef Vec Vec2f; +typedef Vec Vec3f; +typedef Vec Vec4f; +typedef Vec Vec6f; + +typedef Vec Vec2d; +typedef Vec Vec3d; +typedef Vec Vec4d; +typedef Vec Vec6d; +/** @} */ + +/*! + traits +*/ +template class DataType< Vec<_Tp, cn> > +{ +public: + typedef Vec<_Tp, cn> value_type; + typedef Vec::work_type, cn> work_type; + typedef _Tp channel_type; + typedef value_type vec_type; + + enum { generic_type = 0, + channels = cn, + fmt = DataType::fmt + ((channels - 1) << 8), +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = DataType::depth, + type = CV_MAKETYPE(depth, channels), +#endif + _dummy_enum_finalizer = 0 + }; +}; + +namespace traits { +template +struct Depth< Vec<_Tp, cn> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Vec<_Tp, cn> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, cn) }; }; +} // namespace + + +/** @brief Comma-separated Vec Initializer +*/ +template class VecCommaInitializer : public MatxCommaInitializer<_Tp, m, 1> +{ +public: + VecCommaInitializer(Vec<_Tp, m>* _vec); + template VecCommaInitializer<_Tp, m>& operator , (T2 val); + Vec<_Tp, m> operator *() const; +}; + +template static Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v); + +//! @} core_basic + +//! @cond IGNORED + +///////////////////////////////////// helper classes ///////////////////////////////////// +namespace internal +{ + +template struct Matx_DetOp +{ + double operator ()(const Matx<_Tp, m, m>& a) const + { + Matx<_Tp, m, m> temp = a; + double p = LU(temp.val, m*sizeof(_Tp), m, 0, 0, 0); + if( p == 0 ) + return p; + for( int i = 0; i < m; i++ ) + p *= temp(i, i); + return p; + } +}; + +template struct Matx_DetOp<_Tp, 1> +{ + double operator ()(const Matx<_Tp, 1, 1>& a) const + { + return a(0,0); + } +}; + +template struct Matx_DetOp<_Tp, 2> +{ + double operator ()(const Matx<_Tp, 2, 2>& a) const + { + return a(0,0)*a(1,1) - a(0,1)*a(1,0); + } +}; + +template struct Matx_DetOp<_Tp, 3> +{ + double operator ()(const Matx<_Tp, 3, 3>& a) const + { + return a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2)) - + a(0,1)*(a(1,0)*a(2,2) - a(2,0)*a(1,2)) + + a(0,2)*(a(1,0)*a(2,1) - a(2,0)*a(1,1)); + } +}; + +template Vec<_Tp, 2> inline conjugate(const Vec<_Tp, 2>& v) +{ + return Vec<_Tp, 2>(v[0], -v[1]); +} + +template Vec<_Tp, 4> inline conjugate(const Vec<_Tp, 4>& v) +{ + return Vec<_Tp, 4>(v[0], -v[1], -v[2], -v[3]); +} + +} // internal + + + +////////////////////////////////// Matx Implementation /////////////////////////////////// + +template inline +Matx<_Tp, m, n>::Matx() +{ + for(int i = 0; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0) +{ + val[0] = v0; + for(int i = 1; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1) +{ + CV_StaticAssert(channels >= 2, "Matx should have at least 2 elements."); + val[0] = v0; val[1] = v1; + for(int i = 2; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2) +{ + CV_StaticAssert(channels >= 3, "Matx should have at least 3 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; + for(int i = 3; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3) +{ + CV_StaticAssert(channels >= 4, "Matx should have at least 4 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + for(int i = 4; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4) +{ + CV_StaticAssert(channels >= 5, "Matx should have at least 5 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; val[4] = v4; + for(int i = 5; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5) +{ + CV_StaticAssert(channels >= 6, "Matx should have at least 6 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; + for(int i = 6; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6) +{ + CV_StaticAssert(channels >= 7, "Matx should have at least 7 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; + for(int i = 7; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7) +{ + CV_StaticAssert(channels >= 8, "Matx should have at least 8 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + for(int i = 8; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8) +{ + CV_StaticAssert(channels >= 9, "Matx should have at least 9 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; + for(int i = 9; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9) +{ + CV_StaticAssert(channels >= 10, "Matx should have at least 10 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; + for(int i = 10; i < channels; i++) val[i] = _Tp(0); +} + + +template inline +Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11) +{ + CV_StaticAssert(channels >= 12, "Matx should have at least 12 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11; + for(int i = 12; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13) +{ + CV_StaticAssert(channels >= 14, "Matx should have at least 14 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11; + val[12] = v12; val[13] = v13; + for (int i = 14; i < channels; i++) val[i] = _Tp(0); +} + + +template inline +Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13, _Tp v14, _Tp v15) +{ + CV_StaticAssert(channels >= 16, "Matx should have at least 16 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11; + val[12] = v12; val[13] = v13; val[14] = v14; val[15] = v15; + for(int i = 16; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(const _Tp* values) +{ + for( int i = 0; i < channels; i++ ) val[i] = values[i]; +} + +template inline +Matx<_Tp, m, n>::Matx(std::initializer_list<_Tp> list) +{ + CV_DbgAssert(list.size() == channels); + int i = 0; + for(const auto& elem : list) + { + val[i++] = elem; + } +} + +template inline +Matx<_Tp, m, n> Matx<_Tp, m, n>::all(_Tp alpha) +{ + Matx<_Tp, m, n> M; + for( int i = 0; i < m*n; i++ ) M.val[i] = alpha; + return M; +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::zeros() +{ + return all(0); +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::ones() +{ + return all(1); +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::eye() +{ + Matx<_Tp,m,n> M; + for(int i = 0; i < shortdim; i++) + M(i,i) = 1; + return M; +} + +template inline +_Tp Matx<_Tp, m, n>::dot(const Matx<_Tp, m, n>& M) const +{ + _Tp s = 0; + for( int i = 0; i < channels; i++ ) s += val[i]*M.val[i]; + return s; +} + +template inline +double Matx<_Tp, m, n>::ddot(const Matx<_Tp, m, n>& M) const +{ + double s = 0; + for( int i = 0; i < channels; i++ ) s += (double)val[i]*M.val[i]; + return s; +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::diag(const typename Matx<_Tp,m,n>::diag_type& d) +{ + Matx<_Tp,m,n> M; + for(int i = 0; i < shortdim; i++) + M(i,i) = d(i, 0); + return M; +} + +template template +inline Matx<_Tp, m, n>::operator Matx() const +{ + Matx M; + for( int i = 0; i < m*n; i++ ) M.val[i] = saturate_cast(val[i]); + return M; +} + +template template inline +Matx<_Tp, m1, n1> Matx<_Tp, m, n>::reshape() const +{ + CV_StaticAssert(m1*n1 == m*n, "Input and destnarion matrices must have the same number of elements"); + return (const Matx<_Tp, m1, n1>&)*this; +} + +template +template inline +Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int base_row, int base_col) const +{ + CV_DbgAssert(0 <= base_row && base_row+m1 <= m && 0 <= base_col && base_col+n1 <= n); + Matx<_Tp, m1, n1> s; + for( int di = 0; di < m1; di++ ) + for( int dj = 0; dj < n1; dj++ ) + s(di, dj) = (*this)(base_row+di, base_col+dj); + return s; +} + +template inline +Matx<_Tp, 1, n> Matx<_Tp, m, n>::row(int i) const +{ + CV_DbgAssert((unsigned)i < (unsigned)m); + return Matx<_Tp, 1, n>(&val[i*n]); +} + +template inline +Matx<_Tp, m, 1> Matx<_Tp, m, n>::col(int j) const +{ + CV_DbgAssert((unsigned)j < (unsigned)n); + Matx<_Tp, m, 1> v; + for( int i = 0; i < m; i++ ) + v.val[i] = val[i*n + j]; + return v; +} + +template inline +typename Matx<_Tp, m, n>::diag_type Matx<_Tp, m, n>::diag() const +{ + diag_type d; + for( int i = 0; i < shortdim; i++ ) + d.val[i] = val[i*n + i]; + return d; +} + +template inline +const _Tp& Matx<_Tp, m, n>::operator()(int row_idx, int col_idx) const +{ + CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n ); + return this->val[row_idx*n + col_idx]; +} + +template inline +_Tp& Matx<_Tp, m, n>::operator ()(int row_idx, int col_idx) +{ + CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n ); + return val[row_idx*n + col_idx]; +} + +template inline +const _Tp& Matx<_Tp, m, n>::operator ()(int i) const +{ + CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row"); + CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) ); + return val[i]; +} + +template inline +_Tp& Matx<_Tp, m, n>::operator ()(int i) +{ + CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row"); + CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) ); + return val[i]; +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] + b.val[i]); +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] - b.val[i]); +} + +template template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] * alpha); +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] * b.val[i]); +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] / b.val[i]); +} + +template template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp) +{ + for( int i = 0; i < m; i++ ) + for( int j = 0; j < n; j++ ) + { + _Tp s = 0; + for( int k = 0; k < l; k++ ) + s += a(i, k) * b(k, j); + val[i*n + j] = s; + } +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, n, m>& a, Matx_TOp) +{ + for( int i = 0; i < m; i++ ) + for( int j = 0; j < n; j++ ) + val[i*n + j] = a(j, i); +} + +template inline +Matx<_Tp, m, n> Matx<_Tp, m, n>::mul(const Matx<_Tp, m, n>& a) const +{ + return Matx<_Tp, m, n>(*this, a, Matx_MulOp()); +} + +template inline +Matx<_Tp, m, n> Matx<_Tp, m, n>::div(const Matx<_Tp, m, n>& a) const +{ + return Matx<_Tp, m, n>(*this, a, Matx_DivOp()); +} + +template inline +Matx<_Tp, n, m> Matx<_Tp, m, n>::t() const +{ + return Matx<_Tp, n, m>(*this, Matx_TOp()); +} + +template inline +Vec<_Tp, n> Matx<_Tp, m, n>::solve(const Vec<_Tp, m>& rhs, int method) const +{ + Matx<_Tp, n, 1> x = solve((const Matx<_Tp, m, 1>&)(rhs), method); + return (Vec<_Tp, n>&)(x); +} + +template static inline +double determinant(const Matx<_Tp, m, m>& a) +{ + return cv::internal::Matx_DetOp<_Tp, m>()(a); +} + +template static inline +double trace(const Matx<_Tp, m, n>& a) +{ + _Tp s = 0; + for( int i = 0; i < std::min(m, n); i++ ) + s += a(i,i); + return s; +} + +template static inline +double norm(const Matx<_Tp, m, n>& M) +{ + return std::sqrt(normL2Sqr<_Tp, double>(M.val, m*n)); +} + +template static inline +double norm(const Matx<_Tp, m, n>& M, int normType) +{ + switch(normType) { + case NORM_INF: + return (double)normInf<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n); + case NORM_L1: + return (double)normL1<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n); + case NORM_L2SQR: + return (double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n); + default: + case NORM_L2: + return std::sqrt((double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n)); + } +} + + + +//////////////////////////////// matx comma initializer ////////////////////////////////// + +template static inline +MatxCommaInitializer<_Tp, m, n> operator << (const Matx<_Tp, m, n>& mtx, _T2 val) +{ + MatxCommaInitializer<_Tp, m, n> commaInitializer((Matx<_Tp, m, n>*)&mtx); + return (commaInitializer, val); +} + +template inline +MatxCommaInitializer<_Tp, m, n>::MatxCommaInitializer(Matx<_Tp, m, n>* _mtx) + : dst(_mtx), idx(0) +{} + +template template inline +MatxCommaInitializer<_Tp, m, n>& MatxCommaInitializer<_Tp, m, n>::operator , (_T2 value) +{ + CV_DbgAssert( idx < m*n ); + dst->val[idx++] = saturate_cast<_Tp>(value); + return *this; +} + +template inline +Matx<_Tp, m, n> MatxCommaInitializer<_Tp, m, n>::operator *() const +{ + CV_DbgAssert( idx == n*m ); + return *dst; +} + + + +/////////////////////////////////// Vec Implementation /////////////////////////////////// + +template inline +Vec<_Tp, cn>::Vec() {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0) + : Matx<_Tp, cn, 1>(v0) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1) + : Matx<_Tp, cn, 1>(v0, v1) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2) + : Matx<_Tp, cn, 1>(v0, v1, v2) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) {} + +template inline +Vec<_Tp, cn>::Vec(const _Tp* values) + : Matx<_Tp, cn, 1>(values) {} + +template inline +Vec<_Tp, cn>::Vec(std::initializer_list<_Tp> list) + : Matx<_Tp, cn, 1>(list) {} + +template inline +Vec<_Tp, cn>::Vec(const Vec<_Tp, cn>& m) + : Matx<_Tp, cn, 1>(m.val) {} + +template inline +Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp op) + : Matx<_Tp, cn, 1>(a, b, op) {} + +template inline +Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp op) + : Matx<_Tp, cn, 1>(a, b, op) {} + +template template inline +Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp op) + : Matx<_Tp, cn, 1>(a, alpha, op) {} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha) +{ + Vec v; + for( int i = 0; i < cn; i++ ) v.val[i] = alpha; + return v; +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const +{ + Vec<_Tp, cn> w; + for( int i = 0; i < cn; i++ ) w.val[i] = saturate_cast<_Tp>(this->val[i]*v.val[i]); + return w; +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::cross(const Vec<_Tp, cn>&) const +{ + CV_StaticAssert(cn == 3, "for arbitrary-size vector there is no cross-product defined"); + return Vec<_Tp, cn>(); +} + +template<> inline +Vec Vec::cross(const Vec& v) const +{ + return Vec(this->val[1]*v.val[2] - this->val[2]*v.val[1], + this->val[2]*v.val[0] - this->val[0]*v.val[2], + this->val[0]*v.val[1] - this->val[1]*v.val[0]); +} + +template<> inline +Vec Vec::cross(const Vec& v) const +{ + return Vec(this->val[1]*v.val[2] - this->val[2]*v.val[1], + this->val[2]*v.val[0] - this->val[0]*v.val[2], + this->val[0]*v.val[1] - this->val[1]*v.val[0]); +} + +template template inline +Vec<_Tp, cn>::operator Vec() const +{ + Vec v; + for( int i = 0; i < cn; i++ ) v.val[i] = saturate_cast(this->val[i]); + return v; +} + +template inline +const _Tp& Vec<_Tp, cn>::operator [](int i) const +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +_Tp& Vec<_Tp, cn>::operator [](int i) +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +const _Tp& Vec<_Tp, cn>::operator ()(int i) const +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +_Tp& Vec<_Tp, cn>::operator ()(int i) +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v) +{ + double nv = norm(v); + return v * (nv ? 1./nv : 0.); +} + + + +//////////////////////////////// vec comma initializer ////////////////////////////////// + + +template static inline +VecCommaInitializer<_Tp, cn> operator << (const Vec<_Tp, cn>& vec, _T2 val) +{ + VecCommaInitializer<_Tp, cn> commaInitializer((Vec<_Tp, cn>*)&vec); + return (commaInitializer, val); +} + +template inline +VecCommaInitializer<_Tp, cn>::VecCommaInitializer(Vec<_Tp, cn>* _vec) + : MatxCommaInitializer<_Tp, cn, 1>(_vec) +{} + +template template inline +VecCommaInitializer<_Tp, cn>& VecCommaInitializer<_Tp, cn>::operator , (_T2 value) +{ + CV_DbgAssert( this->idx < cn ); + this->dst->val[this->idx++] = saturate_cast<_Tp>(value); + return *this; +} + +template inline +Vec<_Tp, cn> VecCommaInitializer<_Tp, cn>::operator *() const +{ + CV_DbgAssert( this->idx == cn ); + return *this->dst; +} + +//! @endcond + +///////////////////////////// Matx out-of-class operators //////////////////////////////// + +//! @relates cv::Matx +//! @{ + +template static inline +Matx<_Tp1, m, n>& operator += (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]); + return a; +} + +template static inline +Matx<_Tp1, m, n>& operator -= (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]); + return a; +} + +template static inline +Matx<_Tp, m, n> operator + (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + return Matx<_Tp, m, n>(a, b, Matx_AddOp()); +} + +template static inline +Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + return Matx<_Tp, m, n>(a, b, Matx_SubOp()); +} + +template static inline +Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, int alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha); + return a; +} + +template static inline +Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, float alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha); + return a; +} + +template static inline +Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, double alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha); + return a; +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, int alpha) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, float alpha) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, double alpha) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (int alpha, const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (float alpha, const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, float alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = a.val[i] / alpha; + return a; +} + +template static inline +Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, double alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = a.val[i] / alpha; + return a; +} + +template static inline +Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, float alpha) +{ + return Matx<_Tp, m, n>(a, 1.f/alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, double alpha) +{ + return Matx<_Tp, m, n>(a, 1./alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, -1, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b) +{ + return Matx<_Tp, m, n>(a, b, Matx_MatMulOp()); +} + +template static inline +Vec<_Tp, m> operator * (const Matx<_Tp, m, n>& a, const Vec<_Tp, n>& b) +{ + Matx<_Tp, m, 1> c(a, b, Matx_MatMulOp()); + return (const Vec<_Tp, m>&)(c); +} + +template static inline +bool operator == (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + for( int i = 0; i < m*n; i++ ) + if( a.val[i] != b.val[i] ) return false; + return true; +} + +template static inline +bool operator != (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + return !(a == b); +} + +//! @} + +////////////////////////////// Vec out-of-class operators //////////////////////////////// + +//! @relates cv::Vec +//! @{ + +template static inline +Vec<_Tp1, cn>& operator += (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b) +{ + for( int i = 0; i < cn; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]); + return a; +} + +template static inline +Vec<_Tp1, cn>& operator -= (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b) +{ + for( int i = 0; i < cn; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]); + return a; +} + +template static inline +Vec<_Tp, cn> operator + (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b) +{ + return Vec<_Tp, cn>(a, b, Matx_AddOp()); +} + +template static inline +Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b) +{ + return Vec<_Tp, cn>(a, b, Matx_SubOp()); +} + +template static inline +Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, int alpha) +{ + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*alpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, float alpha) +{ + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*alpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, double alpha) +{ + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*alpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, int alpha) +{ + double ialpha = 1./alpha; + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*ialpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, float alpha) +{ + float ialpha = 1.f/alpha; + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*ialpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, double alpha) +{ + double ialpha = 1./alpha; + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*ialpha); + return a; +} + +template static inline +Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, int alpha) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (int alpha, const Vec<_Tp, cn>& a) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, float alpha) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (float alpha, const Vec<_Tp, cn>& a) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, double alpha) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (double alpha, const Vec<_Tp, cn>& a) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, int alpha) +{ + return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, float alpha) +{ + return Vec<_Tp, cn>(a, 1.f/alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, double alpha) +{ + return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a) +{ + Vec<_Tp,cn> t; + for( int i = 0; i < cn; i++ ) t.val[i] = saturate_cast<_Tp>(-a.val[i]); + return t; +} + +template inline Vec<_Tp, 4> operator * (const Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2) +{ + return Vec<_Tp, 4>(saturate_cast<_Tp>(v1[0]*v2[0] - v1[1]*v2[1] - v1[2]*v2[2] - v1[3]*v2[3]), + saturate_cast<_Tp>(v1[0]*v2[1] + v1[1]*v2[0] + v1[2]*v2[3] - v1[3]*v2[2]), + saturate_cast<_Tp>(v1[0]*v2[2] - v1[1]*v2[3] + v1[2]*v2[0] + v1[3]*v2[1]), + saturate_cast<_Tp>(v1[0]*v2[3] + v1[1]*v2[2] - v1[2]*v2[1] + v1[3]*v2[0])); +} + +template inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2) +{ + v1 = v1 * v2; + return v1; +} + +//! @} + +} // cv + +#endif // OPENCV_CORE_MATX_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/neon_utils.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/neon_utils.hpp new file mode 100644 index 0000000..573ba99 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/neon_utils.hpp @@ -0,0 +1,128 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_NEON_UTILS_HPP +#define OPENCV_HAL_NEON_UTILS_HPP + +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils_neon +//! @{ + +#if CV_NEON + +inline int32x2_t cv_vrnd_s32_f32(float32x2_t v) +{ + static int32x2_t v_sign = vdup_n_s32(1 << 31), + v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f)); + + int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v))); + return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition))); +} + +inline int32x4_t cv_vrndq_s32_f32(float32x4_t v) +{ + static int32x4_t v_sign = vdupq_n_s32(1 << 31), + v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); + + int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v))); + return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition))); +} + +inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v) +{ + static float32x2_t v_05 = vdup_n_f32(0.5f); + return vcvt_u32_f32(vadd_f32(v, v_05)); +} + +inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v) +{ + static float32x4_t v_05 = vdupq_n_f32(0.5f); + return vcvtq_u32_f32(vaddq_f32(v, v_05)); +} + +inline float32x4_t cv_vrecpq_f32(float32x4_t val) +{ + float32x4_t reciprocal = vrecpeq_f32(val); + reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); + reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); + return reciprocal; +} + +inline float32x2_t cv_vrecp_f32(float32x2_t val) +{ + float32x2_t reciprocal = vrecpe_f32(val); + reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal); + reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal); + return reciprocal; +} + +inline float32x4_t cv_vrsqrtq_f32(float32x4_t val) +{ + float32x4_t e = vrsqrteq_f32(val); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e); + return e; +} + +inline float32x2_t cv_vrsqrt_f32(float32x2_t val) +{ + float32x2_t e = vrsqrte_f32(val); + e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e); + e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e); + return e; +} + +inline float32x4_t cv_vsqrtq_f32(float32x4_t val) +{ + return cv_vrecpq_f32(cv_vrsqrtq_f32(val)); +} + +inline float32x2_t cv_vsqrt_f32(float32x2_t val) +{ + return cv_vrecp_f32(cv_vrsqrt_f32(val)); +} + +#endif + +//! @} + +#endif // OPENCV_HAL_NEON_UTILS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ocl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ocl.hpp new file mode 100644 index 0000000..66d7a20 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ocl.hpp @@ -0,0 +1,857 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_OPENCL_HPP +#define OPENCV_OPENCL_HPP + +#include "opencv2/core.hpp" + +namespace cv { namespace ocl { + +//! @addtogroup core_opencl +//! @{ + +CV_EXPORTS_W bool haveOpenCL(); +CV_EXPORTS_W bool useOpenCL(); +CV_EXPORTS_W bool haveAmdBlas(); +CV_EXPORTS_W bool haveAmdFft(); +CV_EXPORTS_W void setUseOpenCL(bool flag); +CV_EXPORTS_W void finish(); + +CV_EXPORTS bool haveSVM(); + +class CV_EXPORTS Context; +class CV_EXPORTS_W_SIMPLE Device; +class CV_EXPORTS Kernel; +class CV_EXPORTS Program; +class CV_EXPORTS ProgramSource; +class CV_EXPORTS Queue; +class CV_EXPORTS PlatformInfo; +class CV_EXPORTS Image2D; + +class CV_EXPORTS_W_SIMPLE Device +{ +public: + CV_WRAP Device(); + explicit Device(void* d); + Device(const Device& d); + Device& operator = (const Device& d); + CV_WRAP ~Device(); + + void set(void* d); + + enum + { + TYPE_DEFAULT = (1 << 0), + TYPE_CPU = (1 << 1), + TYPE_GPU = (1 << 2), + TYPE_ACCELERATOR = (1 << 3), + TYPE_DGPU = TYPE_GPU + (1 << 16), + TYPE_IGPU = TYPE_GPU + (1 << 17), + TYPE_ALL = 0xFFFFFFFF + }; + + CV_WRAP String name() const; + CV_WRAP String extensions() const; + CV_WRAP bool isExtensionSupported(const String& extensionName) const; + CV_WRAP String version() const; + CV_WRAP String vendorName() const; + CV_WRAP String OpenCL_C_Version() const; + CV_WRAP String OpenCLVersion() const; + CV_WRAP int deviceVersionMajor() const; + CV_WRAP int deviceVersionMinor() const; + CV_WRAP String driverVersion() const; + void* ptr() const; + + CV_WRAP int type() const; + + CV_WRAP int addressBits() const; + CV_WRAP bool available() const; + CV_WRAP bool compilerAvailable() const; + CV_WRAP bool linkerAvailable() const; + + enum + { + FP_DENORM=(1 << 0), + FP_INF_NAN=(1 << 1), + FP_ROUND_TO_NEAREST=(1 << 2), + FP_ROUND_TO_ZERO=(1 << 3), + FP_ROUND_TO_INF=(1 << 4), + FP_FMA=(1 << 5), + FP_SOFT_FLOAT=(1 << 6), + FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) + }; + CV_WRAP int doubleFPConfig() const; + CV_WRAP int singleFPConfig() const; + CV_WRAP int halfFPConfig() const; + + CV_WRAP bool endianLittle() const; + CV_WRAP bool errorCorrectionSupport() const; + + enum + { + EXEC_KERNEL=(1 << 0), + EXEC_NATIVE_KERNEL=(1 << 1) + }; + CV_WRAP int executionCapabilities() const; + + CV_WRAP size_t globalMemCacheSize() const; + + enum + { + NO_CACHE=0, + READ_ONLY_CACHE=1, + READ_WRITE_CACHE=2 + }; + CV_WRAP int globalMemCacheType() const; + CV_WRAP int globalMemCacheLineSize() const; + CV_WRAP size_t globalMemSize() const; + + CV_WRAP size_t localMemSize() const; + enum + { + NO_LOCAL_MEM=0, + LOCAL_IS_LOCAL=1, + LOCAL_IS_GLOBAL=2 + }; + CV_WRAP int localMemType() const; + CV_WRAP bool hostUnifiedMemory() const; + + CV_WRAP bool imageSupport() const; + + CV_WRAP bool imageFromBufferSupport() const; + uint imagePitchAlignment() const; + uint imageBaseAddressAlignment() const; + + /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value) + CV_WRAP bool intelSubgroupsSupport() const; + + CV_WRAP size_t image2DMaxWidth() const; + CV_WRAP size_t image2DMaxHeight() const; + + CV_WRAP size_t image3DMaxWidth() const; + CV_WRAP size_t image3DMaxHeight() const; + CV_WRAP size_t image3DMaxDepth() const; + + CV_WRAP size_t imageMaxBufferSize() const; + CV_WRAP size_t imageMaxArraySize() const; + + enum + { + UNKNOWN_VENDOR=0, + VENDOR_AMD=1, + VENDOR_INTEL=2, + VENDOR_NVIDIA=3 + }; + CV_WRAP int vendorID() const; + // FIXIT + // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform. + // This method should use platform name instead of vendor name. + // After fix restore code in arithm.cpp: ocl_compare() + CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; } + CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } + CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } + + CV_WRAP int maxClockFrequency() const; + CV_WRAP int maxComputeUnits() const; + CV_WRAP int maxConstantArgs() const; + CV_WRAP size_t maxConstantBufferSize() const; + + CV_WRAP size_t maxMemAllocSize() const; + CV_WRAP size_t maxParameterSize() const; + + CV_WRAP int maxReadImageArgs() const; + CV_WRAP int maxWriteImageArgs() const; + CV_WRAP int maxSamplers() const; + + CV_WRAP size_t maxWorkGroupSize() const; + CV_WRAP int maxWorkItemDims() const; + void maxWorkItemSizes(size_t*) const; + + CV_WRAP int memBaseAddrAlign() const; + + CV_WRAP int nativeVectorWidthChar() const; + CV_WRAP int nativeVectorWidthShort() const; + CV_WRAP int nativeVectorWidthInt() const; + CV_WRAP int nativeVectorWidthLong() const; + CV_WRAP int nativeVectorWidthFloat() const; + CV_WRAP int nativeVectorWidthDouble() const; + CV_WRAP int nativeVectorWidthHalf() const; + + CV_WRAP int preferredVectorWidthChar() const; + CV_WRAP int preferredVectorWidthShort() const; + CV_WRAP int preferredVectorWidthInt() const; + CV_WRAP int preferredVectorWidthLong() const; + CV_WRAP int preferredVectorWidthFloat() const; + CV_WRAP int preferredVectorWidthDouble() const; + CV_WRAP int preferredVectorWidthHalf() const; + + CV_WRAP size_t printfBufferSize() const; + CV_WRAP size_t profilingTimerResolution() const; + + CV_WRAP static const Device& getDefault(); + + /** + * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success. + */ + static Device fromHandle(void* d); + + struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + + +class CV_EXPORTS Context +{ +public: + Context(); + explicit Context(int dtype); //!< @deprecated + ~Context(); + Context(const Context& c); + Context& operator= (const Context& c); + + /** @deprecated */ + bool create(); + /** @deprecated */ + bool create(int dtype); + + size_t ndevices() const; + Device& device(size_t idx) const; + Program getProg(const ProgramSource& prog, + const String& buildopt, String& errmsg); + void unloadProg(Program& prog); + + + /** Get thread-local OpenCL context (initialize if necessary) */ +#if 0 // OpenCV 5.0 + static Context& getDefault(); +#else + static Context& getDefault(bool initialize = true); +#endif + + /** @returns cl_context value */ + void* ptr() const; + + + bool useSVM() const; + void setUseSVM(bool enabled); + + /** + * @param context OpenCL handle (cl_context). clRetainContext() is called on success + */ + static Context fromHandle(void* context); + static Context fromDevice(const ocl::Device& device); + static Context create(const std::string& configuration); + + void release(); + + struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +// TODO OpenCV 5.0 +//protected: + Impl* p; +}; + +/** @deprecated */ +class CV_EXPORTS Platform +{ +public: + Platform(); + ~Platform(); + Platform(const Platform& p); + Platform& operator = (const Platform& p); + + void* ptr() const; + + /** @deprecated */ + static Platform& getDefault(); + + struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + +/** @brief Attaches OpenCL context to OpenCV +@note + OpenCV will check if available OpenCL platform has platformName name, then assign context to + OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and + new command queue will be created. +@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime +@param platformID ID of platform attached context was created for +@param context OpenCL context to be attached to OpenCV +@param deviceID ID of device, must be created from attached context +*/ +CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID); + +/** @brief Convert OpenCL buffer to UMat +@note + OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory + content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and + `clRetainMemObject` is called. +@param cl_mem_buffer source clBuffer handle +@param step num of bytes in single row +@param rows number of rows +@param cols number of cols +@param type OpenCV type of image +@param dst destination UMat +*/ +CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst); + +/** @brief Convert OpenCL image2d_t to UMat +@note + OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content + is copied from image to UMat with `clEnqueueCopyImageToBuffer` function. +@param cl_mem_image source image2d_t handle +@param dst destination UMat +*/ +CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst); + +// TODO Move to internal header +/// @deprecated +void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); + +class CV_EXPORTS Queue +{ +public: + Queue(); + explicit Queue(const Context& c, const Device& d=Device()); + ~Queue(); + Queue(const Queue& q); + Queue& operator = (const Queue& q); + + bool create(const Context& c=Context(), const Device& d=Device()); + void finish(); + void* ptr() const; + static Queue& getDefault(); + + /// @brief Returns OpenCL command queue with enable profiling mode support + const Queue& getProfilingQueue() const; + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + + +class CV_EXPORTS KernelArg +{ +public: + enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 }; + KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0); + KernelArg(); + + static KernelArg Local(size_t localMemSize) + { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); } + static KernelArg PtrWriteOnly(const UMat& m) + { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); } + static KernelArg PtrReadOnly(const UMat& m) + { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); } + static KernelArg PtrReadWrite(const UMat& m) + { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); } + static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); } + static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); } + static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); } + static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); } + static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } + static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } + static KernelArg Constant(const Mat& m); + template static KernelArg Constant(const _Tp* arr, size_t n) + { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); } + + int flags; + UMat* m; + const void* obj; + size_t sz; + int wscale, iwscale; +}; + + +class CV_EXPORTS Kernel +{ +public: + Kernel(); + Kernel(const char* kname, const Program& prog); + Kernel(const char* kname, const ProgramSource& prog, + const String& buildopts = String(), String* errmsg=0); + ~Kernel(); + Kernel(const Kernel& k); + Kernel& operator = (const Kernel& k); + + bool empty() const; + bool create(const char* kname, const Program& prog); + bool create(const char* kname, const ProgramSource& prog, + const String& buildopts, String* errmsg=0); + + int set(int i, const void* value, size_t sz); + int set(int i, const Image2D& image2D); + int set(int i, const UMat& m); + int set(int i, const KernelArg& arg); + template int set(int i, const _Tp& value) + { return set(i, &value, sizeof(value)); } + + +protected: + template inline + int set_args_(int i, const _Tp0& a0) { return set(i, a0); } + template inline + int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); } +public: + /** @brief Setup OpenCL Kernel arguments. + Avoid direct using of set(i, ...) methods. + @code + bool ok = kernel + .args( + srcUMat, dstUMat, + (float)some_float_param + ).run(ndims, globalSize, localSize); + if (!ok) return false; + @endcode + */ + template inline + Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; } + + + /** @brief Run the OpenCL kernel. + @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. + @param globalsize work items for each dimension. It is not the final globalsize passed to + OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding + value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The + adjusted values are greater than or equal to the original values. + @param localsize work-group size for each dimension. + @param sync specify whether to wait for OpenCL computation to finish before return. + @param q command queue + */ + bool run(int dims, size_t globalsize[], + size_t localsize[], bool sync, const Queue& q=Queue()); + bool runTask(bool sync, const Queue& q=Queue()); + + /** @brief Similar to synchronized run() call with returning of kernel execution time + * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE) + * @return Execution time in nanoseconds or negative number on error + */ + int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue()); + + size_t workGroupSize() const; + size_t preferedWorkGroupSizeMultiple() const; + bool compileWorkGroupSize(size_t wsz[]) const; + size_t localMemSize() const; + + void* ptr() const; + struct Impl; + +protected: + Impl* p; +}; + +class CV_EXPORTS Program +{ +public: + Program(); + Program(const ProgramSource& src, + const String& buildflags, String& errmsg); + Program(const Program& prog); + + Program& operator = (const Program& prog); + ~Program(); + + bool create(const ProgramSource& src, + const String& buildflags, String& errmsg); + + void* ptr() const; + + /** + * @brief Query device-specific program binary. + * + * Returns RAW OpenCL executable binary without additional attachments. + * + * @sa ProgramSource::fromBinary + * + * @param[out] binary output buffer + */ + void getBinary(std::vector& binary) const; + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +public: +#ifndef OPENCV_REMOVE_DEPRECATED_API + // TODO Remove this + CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead + CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary) + CV_DEPRECATED const ProgramSource& source() const; // implementation removed + CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced + CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced +#endif +}; + + +class CV_EXPORTS ProgramSource +{ +public: + typedef uint64 hash_t; // deprecated + + ProgramSource(); + explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash); + explicit ProgramSource(const String& prog); // deprecated + explicit ProgramSource(const char* prog); // deprecated + ~ProgramSource(); + ProgramSource(const ProgramSource& prog); + ProgramSource& operator = (const ProgramSource& prog); + + const String& source() const; // deprecated + hash_t hash() const; // deprecated + + + /** @brief Describe OpenCL program binary. + * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). + * + * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). + * + * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version. + * + * @param module name of program owner module + * @param name unique name of program (module+name is used as key for OpenCL program caching) + * @param binary buffer address. See buffer lifetime requirement in description. + * @param size buffer size + * @param buildOptions additional program-related build options passed to clBuildProgram() + * @return created ProgramSource object + */ + static ProgramSource fromBinary(const String& module, const String& name, + const unsigned char* binary, const size_t size, + const cv::String& buildOptions = cv::String()); + + /** @brief Describe OpenCL program in SPIR format. + * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). + * + * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior) + * + * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). + * + * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension: + * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html + * (but they are not portable between different platforms: 32-bit / 64-bit) + * + * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'. + * + * @param module name of program owner module + * @param name unique name of program (module+name is used as key for OpenCL program caching) + * @param binary buffer address. See buffer lifetime requirement in description. + * @param size buffer size + * @param buildOptions additional program-related build options passed to clBuildProgram() + * (these options are added automatically: '-x spir' and '-spir-std=1.2') + * @return created ProgramSource object. + */ + static ProgramSource fromSPIR(const String& module, const String& name, + const unsigned char* binary, const size_t size, + const cv::String& buildOptions = cv::String()); + + //OpenCL 2.1+ only + //static Program fromSPIRV(const String& module, const String& name, + // const unsigned char* binary, const size_t size, + // const cv::String& buildOptions = cv::String()); + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + +class CV_EXPORTS PlatformInfo +{ +public: + PlatformInfo(); + /** + * @param id pointer cl_platform_id (cl_platform_id*) + */ + explicit PlatformInfo(void* id); + ~PlatformInfo(); + + PlatformInfo(const PlatformInfo& i); + PlatformInfo& operator =(const PlatformInfo& i); + + String name() const; + String vendor() const; + + /// See CL_PLATFORM_VERSION + String version() const; + int versionMajor() const; + int versionMinor() const; + + int deviceNumber() const; + void getDevice(Device& device, int d) const; + + struct Impl; + bool empty() const { return !p; } +protected: + Impl* p; +}; + +CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf); +CV_EXPORTS const char* typeToStr(int t); +CV_EXPORTS const char* memopTypeToStr(int t); +CV_EXPORTS const char* vecopTypeToStr(int t); +CV_EXPORTS const char* getOpenCLErrorString(int errorCode); +CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL); +CV_EXPORTS void getPlatfomsInfo(std::vector& platform_info); + + +enum OclVectorStrategy +{ + // all matrices have its own vector width + OCL_VECTOR_OWN = 0, + // all matrices have maximal vector width among all matrices + // (useful for cases when matrices have different data types) + OCL_VECTOR_MAX = 1, + + // default strategy + OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN +}; + +CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), + InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), + OclVectorStrategy strat = OCL_VECTOR_DEFAULT); + +CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths, + InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), + InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), + OclVectorStrategy strat = OCL_VECTOR_DEFAULT); + +// with OCL_VECTOR_MAX strategy +CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), + InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); + +CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m); + +class CV_EXPORTS Image2D +{ +public: + Image2D(); + + /** + @param src UMat object from which to get image properties and data + @param norm flag to enable the use of normalized channel data types + @param alias flag indicating that the image should alias the src UMat. If true, changes to the + image or src will be reflected in both objects. + */ + explicit Image2D(const UMat &src, bool norm = false, bool alias = false); + Image2D(const Image2D & i); + ~Image2D(); + + Image2D & operator = (const Image2D & i); + + /** Indicates if creating an aliased image should succeed. + Depends on the underlying platform and the dimensions of the UMat. + */ + static bool canCreateAlias(const UMat &u); + + /** Indicates if the image format is supported. + */ + static bool isFormatSupported(int depth, int cn, bool norm); + + void* ptr() const; +protected: + struct Impl; + Impl* p; +}; + +class CV_EXPORTS Timer +{ +public: + Timer(const Queue& q); + ~Timer(); + void start(); + void stop(); + + uint64 durationNS() const; //< duration in nanoseconds + +protected: + struct Impl; + Impl* const p; + +private: + Timer(const Timer&); // disabled + Timer& operator=(const Timer&); // disabled +}; + +CV_EXPORTS MatAllocator* getOpenCLAllocator(); + + +class CV_EXPORTS_W OpenCLExecutionContext +{ +public: + OpenCLExecutionContext() = default; + ~OpenCLExecutionContext() = default; + + OpenCLExecutionContext(const OpenCLExecutionContext&) = default; + OpenCLExecutionContext(OpenCLExecutionContext&&) = default; + + OpenCLExecutionContext& operator=(const OpenCLExecutionContext&) = default; + OpenCLExecutionContext& operator=(OpenCLExecutionContext&&) = default; + + /** Get associated ocl::Context */ + Context& getContext() const; + /** Get the single default associated ocl::Device */ + Device& getDevice() const; + /** Get the single ocl::Queue that is associated with the ocl::Context and + * the single default ocl::Device + */ + Queue& getQueue() const; + + bool useOpenCL() const; + void setUseOpenCL(bool flag); + + /** Get OpenCL execution context of current thread. + * + * Initialize OpenCL execution context if it is empty + * - create new + * - reuse context of the main thread (threadID = 0) + */ + static OpenCLExecutionContext& getCurrent(); + + /** Get OpenCL execution context of current thread (can be empty) */ + static OpenCLExecutionContext& getCurrentRef(); + + /** Bind this OpenCL execution context to current thread. + * + * Context can't be empty. + * + * @note clFinish is not called for queue of previous execution context + */ + void bind() const; + + /** Creates new execution context with same OpenCV context and device + * + * @param q OpenCL queue + */ + OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const; + /** @overload */ + OpenCLExecutionContext cloneWithNewQueue() const; + + /** @brief Creates OpenCL execution context + * OpenCV will check if available OpenCL platform has platformName name, then assign context to + * OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and + * new command queue will be created. + * + * @note Lifetime of passed handles is transferred to OpenCV wrappers on success + * + * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime + * @param platformID ID of platform attached context was created for (cl_platform_id) + * @param context OpenCL context to be attached to OpenCV (cl_context) + * @param deviceID OpenCL device (cl_device_id) + */ + static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID); + + /** @brief Creates OpenCL execution context + * + * @param context non-empty OpenCL context + * @param device non-empty OpenCL device (must be a part of context) + * @param queue non-empty OpenCL queue for provided context and device + */ + static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue); + /** @overload */ + static OpenCLExecutionContext create(const Context& context, const Device& device); + + struct Impl; + inline bool empty() const { return !p; } + void release(); +protected: + std::shared_ptr p; +}; + +class OpenCLExecutionContextScope +{ + OpenCLExecutionContext ctx_; +public: + inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx) + { + CV_Assert(!ctx.empty()); + ctx_ = OpenCLExecutionContext::getCurrentRef(); + ctx.bind(); + } + + inline ~OpenCLExecutionContextScope() + { + if (!ctx_.empty()) + { + ctx_.bind(); + } + } +}; + +#ifdef __OPENCV_BUILD +namespace internal { + +CV_EXPORTS bool isOpenCLForced(); +#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition)) + +CV_EXPORTS bool isPerformanceCheckBypassed(); +#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition)) + +CV_EXPORTS bool isCLBuffer(UMat& u); + +} // namespace internal +#endif + +//! @} + +}} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ocl_genbase.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ocl_genbase.hpp new file mode 100644 index 0000000..5334cf1 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ocl_genbase.hpp @@ -0,0 +1,69 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_OPENCL_GENBASE_HPP +#define OPENCV_OPENCL_GENBASE_HPP + +//! @cond IGNORED + +namespace cv { +namespace ocl { + +class ProgramSource; + +namespace internal { + +struct CV_EXPORTS ProgramEntry +{ + const char* module; + const char* name; + const char* programCode; + const char* programHash; + ProgramSource* pProgramSource; + + operator ProgramSource& () const; +}; + +} } } // namespace + +//! @endcond + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/ocl_defs.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/ocl_defs.hpp new file mode 100644 index 0000000..14df750 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/ocl_defs.hpp @@ -0,0 +1,82 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_CORE_OPENCL_DEFS_HPP +#define OPENCV_CORE_OPENCL_DEFS_HPP + +#include "opencv2/core/utility.hpp" +#include "cvconfig.h" + +namespace cv { namespace ocl { +#ifdef HAVE_OPENCL +/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context +CV_EXPORTS bool isOpenCLActivated(); +#else +static inline bool isOpenCLActivated() { return false; } +#endif +}} // namespace + + +//#define CV_OPENCL_RUN_ASSERT + +#ifdef HAVE_OPENCL + +#ifdef CV_OPENCL_RUN_VERBOSE +#define CV_OCL_RUN_(condition, func, ...) \ + { \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ + { \ + printf("%s: OpenCL implementation is running\n", CV_Func); \ + fflush(stdout); \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + return __VA_ARGS__; \ + } \ + else \ + { \ + printf("%s: Plain implementation is running\n", CV_Func); \ + fflush(stdout); \ + } \ + } +#elif defined CV_OPENCL_RUN_ASSERT +#define CV_OCL_RUN_(condition, func, ...) \ + { \ + if (cv::ocl::isOpenCLActivated() && (condition)) \ + { \ + if(func) \ + { \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + } \ + else \ + { \ + CV_Error(cv::Error::StsAssert, #func); \ + } \ + return __VA_ARGS__; \ + } \ + } +#else +#define CV_OCL_RUN_(condition, func, ...) \ +try \ +{ \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ + { \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + return __VA_ARGS__; \ + } \ +} \ +catch (const cv::Exception& e) \ +{ \ + CV_UNUSED(e); /* TODO: Add some logging here */ \ +} +#endif + +#else +#define CV_OCL_RUN_(condition, func, ...) +#endif + +#define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func) + +#endif // OPENCV_CORE_OPENCL_DEFS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/opencl_info.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/opencl_info.hpp new file mode 100644 index 0000000..5e5c846 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/opencl_info.hpp @@ -0,0 +1,205 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include + +#include +#include + +#ifndef DUMP_CONFIG_PROPERTY +#define DUMP_CONFIG_PROPERTY(...) +#endif + +#ifndef DUMP_MESSAGE_STDOUT +#define DUMP_MESSAGE_STDOUT(...) do { std::cout << __VA_ARGS__ << std::endl; } while (false) +#endif + +namespace cv { + +namespace { +static std::string bytesToStringRepr(size_t value) +{ + size_t b = value % 1024; + value /= 1024; + + size_t kb = value % 1024; + value /= 1024; + + size_t mb = value % 1024; + value /= 1024; + + size_t gb = value; + + std::ostringstream stream; + + if (gb > 0) + stream << gb << " GB "; + if (mb > 0) + stream << mb << " MB "; + if (kb > 0) + stream << kb << " KB "; + if (b > 0) + stream << b << " B"; + + std::string s = stream.str(); + if (s[s.size() - 1] == ' ') + s = s.substr(0, s.size() - 1); + return s; +} + +static String getDeviceTypeString(const cv::ocl::Device& device) +{ + if (device.type() == cv::ocl::Device::TYPE_CPU) { + return "CPU"; + } + + if (device.type() == cv::ocl::Device::TYPE_GPU) { + if (device.hostUnifiedMemory()) { + return "iGPU"; + } else { + return "dGPU"; + } + } + + return "unknown"; +} +} // namespace + +static void dumpOpenCLInformation() +{ + using namespace cv::ocl; + + try + { + if (!haveOpenCL() || !useOpenCL()) + { + DUMP_MESSAGE_STDOUT("OpenCL is disabled"); + DUMP_CONFIG_PROPERTY("cv_ocl", "disabled"); + return; + } + + std::vector platforms; + cv::ocl::getPlatfomsInfo(platforms); + if (platforms.empty()) + { + DUMP_MESSAGE_STDOUT("OpenCL is not available"); + DUMP_CONFIG_PROPERTY("cv_ocl", "not available"); + return; + } + + DUMP_MESSAGE_STDOUT("OpenCL Platforms: "); + for (size_t i = 0; i < platforms.size(); i++) + { + const PlatformInfo* platform = &platforms[i]; + DUMP_MESSAGE_STDOUT(" " << platform->name()); + Device current_device; + for (int j = 0; j < platform->deviceNumber(); j++) + { + platform->getDevice(current_device, j); + String deviceTypeStr = getDeviceTypeString(current_device); + DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")"); + DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ), + cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)", + platform->name().c_str(), deviceTypeStr.c_str(), current_device.name().c_str(), current_device.version().c_str()) ); + } + } + const Device& device = Device::getDefault(); + if (!device.available()) + CV_Error(Error::OpenCLInitError, "OpenCL device is not available"); + + DUMP_MESSAGE_STDOUT("Current OpenCL device: "); + + String deviceTypeStr = getDeviceTypeString(device); + DUMP_MESSAGE_STDOUT(" Type = " << deviceTypeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr); + + DUMP_MESSAGE_STDOUT(" Name = " << device.name()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceName", device.name()); + + DUMP_MESSAGE_STDOUT(" Version = " << device.version()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceVersion", device.version()); + + DUMP_MESSAGE_STDOUT(" Driver version = " << device.driverVersion()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_driverVersion", device.driverVersion()); + + DUMP_MESSAGE_STDOUT(" Address bits = " << device.addressBits()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_addressBits", device.addressBits()); + + DUMP_MESSAGE_STDOUT(" Compute units = " << device.maxComputeUnits()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxComputeUnits", device.maxComputeUnits()); + + DUMP_MESSAGE_STDOUT(" Max work group size = " << device.maxWorkGroupSize()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxWorkGroupSize", device.maxWorkGroupSize()); + + std::string localMemorySizeStr = bytesToStringRepr(device.localMemSize()); + DUMP_MESSAGE_STDOUT(" Local memory size = " << localMemorySizeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_localMemSize", device.localMemSize()); + + std::string maxMemAllocSizeStr = bytesToStringRepr(device.maxMemAllocSize()); + DUMP_MESSAGE_STDOUT(" Max memory allocation size = " << maxMemAllocSizeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxMemAllocSize", device.maxMemAllocSize()); + + const char* doubleSupportStr = device.doubleFPConfig() > 0 ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Double support = " << doubleSupportStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0); + + const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Host unified memory = " << isUnifiedMemoryStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory()); + + DUMP_MESSAGE_STDOUT(" Device extensions:"); + String extensionsStr = device.extensions(); + size_t pos = 0; + while (pos < extensionsStr.size()) + { + size_t pos2 = extensionsStr.find(' ', pos); + if (pos2 == String::npos) + pos2 = extensionsStr.size(); + if (pos2 > pos) + { + String extensionName = extensionsStr.substr(pos, pos2 - pos); + DUMP_MESSAGE_STDOUT(" " << extensionName); + } + pos = pos2 + 1; + } + DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr); + + const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Has AMD Blas = " << haveAmdBlasStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdBlas", haveAmdBlas()); + + const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Has AMD Fft = " << haveAmdFftStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdFft", haveAmdFft()); + + + DUMP_MESSAGE_STDOUT(" Preferred vector width char = " << device.preferredVectorWidthChar()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width short = " << device.preferredVectorWidthShort()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width int = " << device.preferredVectorWidthInt()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width long = " << device.preferredVectorWidthLong()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width float = " << device.preferredVectorWidthFloat()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width double = " << device.preferredVectorWidthDouble()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble()); + } + catch (...) + { + DUMP_MESSAGE_STDOUT("Exception. Can't dump OpenCL info"); + DUMP_MESSAGE_STDOUT("OpenCL device not available"); + DUMP_CONFIG_PROPERTY("cv_ocl", "not available"); + } +} +#undef DUMP_MESSAGE_STDOUT +#undef DUMP_CONFIG_PROPERTY + +} // namespace diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/opencl_svm.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/opencl_svm.hpp new file mode 100644 index 0000000..7453082 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/opencl_svm.hpp @@ -0,0 +1,81 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OPENCL_SVM_HPP +#define OPENCV_CORE_OPENCL_SVM_HPP + +// +// Internal usage only (binary compatibility is not guaranteed) +// +#ifndef __OPENCV_BUILD +#error Internal header file +#endif + +#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM) +#include "runtime/opencl_core.hpp" +#include "runtime/opencl_svm_20.hpp" +#include "runtime/opencl_svm_hsa_extension.hpp" + +namespace cv { namespace ocl { namespace svm { + +struct SVMCapabilities +{ + enum Value + { + SVM_COARSE_GRAIN_BUFFER = (1 << 0), + SVM_FINE_GRAIN_BUFFER = (1 << 1), + SVM_FINE_GRAIN_SYSTEM = (1 << 2), + SVM_ATOMICS = (1 << 3), + }; + int value_; + + SVMCapabilities(int capabilities = 0) : value_(capabilities) { } + operator int() const { return value_; } + + inline bool isNoSVMSupport() const { return value_ == 0; } + inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; } + inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; } +}; + +CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context); + +struct SVMFunctions +{ + clSVMAllocAMD_fn fn_clSVMAlloc; + clSVMFreeAMD_fn fn_clSVMFree; + clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer; + //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo; + //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree; + clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy; + clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill; + clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap; + clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap; + + inline SVMFunctions() + : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL), + fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/ + /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL), + fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL) + { + // nothing + } + + inline bool isValid() const + { + return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer && + /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy && + fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap; + } +}; + +// We should guarantee that SVMFunctions lifetime is not less than context's lifetime +CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context); + +CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags); + +}}} //namespace cv::ocl::svm +#endif + +#endif // OPENCV_CORE_OPENCL_SVM_HPP +/* End of file. */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp new file mode 100644 index 0000000..65c8493 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp @@ -0,0 +1,714 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP +#error "Invalid usage" +#endif + +// generated by parser_clamdblas.py +#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_ +#define clAmdBlasCaxpy clAmdBlasCaxpy_ +#define clAmdBlasCcopy clAmdBlasCcopy_ +#define clAmdBlasCdotc clAmdBlasCdotc_ +#define clAmdBlasCdotu clAmdBlasCdotu_ +#define clAmdBlasCgbmv clAmdBlasCgbmv_ +#define clAmdBlasCgemm clAmdBlasCgemm_ +#define clAmdBlasCgemmEx clAmdBlasCgemmEx_ +#define clAmdBlasCgemv clAmdBlasCgemv_ +#define clAmdBlasCgemvEx clAmdBlasCgemvEx_ +#define clAmdBlasCgerc clAmdBlasCgerc_ +#define clAmdBlasCgeru clAmdBlasCgeru_ +#define clAmdBlasChbmv clAmdBlasChbmv_ +#define clAmdBlasChemm clAmdBlasChemm_ +#define clAmdBlasChemv clAmdBlasChemv_ +#define clAmdBlasCher clAmdBlasCher_ +#define clAmdBlasCher2 clAmdBlasCher2_ +#define clAmdBlasCher2k clAmdBlasCher2k_ +#define clAmdBlasCherk clAmdBlasCherk_ +#define clAmdBlasChpmv clAmdBlasChpmv_ +#define clAmdBlasChpr clAmdBlasChpr_ +#define clAmdBlasChpr2 clAmdBlasChpr2_ +#define clAmdBlasCrotg clAmdBlasCrotg_ +#define clAmdBlasCscal clAmdBlasCscal_ +#define clAmdBlasCsrot clAmdBlasCsrot_ +#define clAmdBlasCsscal clAmdBlasCsscal_ +#define clAmdBlasCswap clAmdBlasCswap_ +#define clAmdBlasCsymm clAmdBlasCsymm_ +#define clAmdBlasCsyr2k clAmdBlasCsyr2k_ +#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_ +#define clAmdBlasCsyrk clAmdBlasCsyrk_ +#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_ +#define clAmdBlasCtbmv clAmdBlasCtbmv_ +#define clAmdBlasCtbsv clAmdBlasCtbsv_ +#define clAmdBlasCtpmv clAmdBlasCtpmv_ +#define clAmdBlasCtpsv clAmdBlasCtpsv_ +#define clAmdBlasCtrmm clAmdBlasCtrmm_ +#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_ +#define clAmdBlasCtrmv clAmdBlasCtrmv_ +#define clAmdBlasCtrsm clAmdBlasCtrsm_ +#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_ +#define clAmdBlasCtrsv clAmdBlasCtrsv_ +#define clAmdBlasDasum clAmdBlasDasum_ +#define clAmdBlasDaxpy clAmdBlasDaxpy_ +#define clAmdBlasDcopy clAmdBlasDcopy_ +#define clAmdBlasDdot clAmdBlasDdot_ +#define clAmdBlasDgbmv clAmdBlasDgbmv_ +#define clAmdBlasDgemm clAmdBlasDgemm_ +#define clAmdBlasDgemmEx clAmdBlasDgemmEx_ +#define clAmdBlasDgemv clAmdBlasDgemv_ +#define clAmdBlasDgemvEx clAmdBlasDgemvEx_ +#define clAmdBlasDger clAmdBlasDger_ +#define clAmdBlasDnrm2 clAmdBlasDnrm2_ +#define clAmdBlasDrot clAmdBlasDrot_ +#define clAmdBlasDrotg clAmdBlasDrotg_ +#define clAmdBlasDrotm clAmdBlasDrotm_ +#define clAmdBlasDrotmg clAmdBlasDrotmg_ +#define clAmdBlasDsbmv clAmdBlasDsbmv_ +#define clAmdBlasDscal clAmdBlasDscal_ +#define clAmdBlasDspmv clAmdBlasDspmv_ +#define clAmdBlasDspr clAmdBlasDspr_ +#define clAmdBlasDspr2 clAmdBlasDspr2_ +#define clAmdBlasDswap clAmdBlasDswap_ +#define clAmdBlasDsymm clAmdBlasDsymm_ +#define clAmdBlasDsymv clAmdBlasDsymv_ +#define clAmdBlasDsymvEx clAmdBlasDsymvEx_ +#define clAmdBlasDsyr clAmdBlasDsyr_ +#define clAmdBlasDsyr2 clAmdBlasDsyr2_ +#define clAmdBlasDsyr2k clAmdBlasDsyr2k_ +#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_ +#define clAmdBlasDsyrk clAmdBlasDsyrk_ +#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_ +#define clAmdBlasDtbmv clAmdBlasDtbmv_ +#define clAmdBlasDtbsv clAmdBlasDtbsv_ +#define clAmdBlasDtpmv clAmdBlasDtpmv_ +#define clAmdBlasDtpsv clAmdBlasDtpsv_ +#define clAmdBlasDtrmm clAmdBlasDtrmm_ +#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_ +#define clAmdBlasDtrmv clAmdBlasDtrmv_ +#define clAmdBlasDtrsm clAmdBlasDtrsm_ +#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_ +#define clAmdBlasDtrsv clAmdBlasDtrsv_ +#define clAmdBlasDzasum clAmdBlasDzasum_ +#define clAmdBlasDznrm2 clAmdBlasDznrm2_ +#define clAmdBlasGetVersion clAmdBlasGetVersion_ +#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_ +#define clAmdBlasSasum clAmdBlasSasum_ +#define clAmdBlasSaxpy clAmdBlasSaxpy_ +#define clAmdBlasScasum clAmdBlasScasum_ +#define clAmdBlasScnrm2 clAmdBlasScnrm2_ +#define clAmdBlasScopy clAmdBlasScopy_ +#define clAmdBlasSdot clAmdBlasSdot_ +#define clAmdBlasSetup clAmdBlasSetup_ +#define clAmdBlasSgbmv clAmdBlasSgbmv_ +#define clAmdBlasSgemm clAmdBlasSgemm_ +#define clAmdBlasSgemmEx clAmdBlasSgemmEx_ +#define clAmdBlasSgemv clAmdBlasSgemv_ +#define clAmdBlasSgemvEx clAmdBlasSgemvEx_ +#define clAmdBlasSger clAmdBlasSger_ +#define clAmdBlasSnrm2 clAmdBlasSnrm2_ +#define clAmdBlasSrot clAmdBlasSrot_ +#define clAmdBlasSrotg clAmdBlasSrotg_ +#define clAmdBlasSrotm clAmdBlasSrotm_ +#define clAmdBlasSrotmg clAmdBlasSrotmg_ +#define clAmdBlasSsbmv clAmdBlasSsbmv_ +#define clAmdBlasSscal clAmdBlasSscal_ +#define clAmdBlasSspmv clAmdBlasSspmv_ +#define clAmdBlasSspr clAmdBlasSspr_ +#define clAmdBlasSspr2 clAmdBlasSspr2_ +#define clAmdBlasSswap clAmdBlasSswap_ +#define clAmdBlasSsymm clAmdBlasSsymm_ +#define clAmdBlasSsymv clAmdBlasSsymv_ +#define clAmdBlasSsymvEx clAmdBlasSsymvEx_ +#define clAmdBlasSsyr clAmdBlasSsyr_ +#define clAmdBlasSsyr2 clAmdBlasSsyr2_ +#define clAmdBlasSsyr2k clAmdBlasSsyr2k_ +#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_ +#define clAmdBlasSsyrk clAmdBlasSsyrk_ +#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_ +#define clAmdBlasStbmv clAmdBlasStbmv_ +#define clAmdBlasStbsv clAmdBlasStbsv_ +#define clAmdBlasStpmv clAmdBlasStpmv_ +#define clAmdBlasStpsv clAmdBlasStpsv_ +#define clAmdBlasStrmm clAmdBlasStrmm_ +#define clAmdBlasStrmmEx clAmdBlasStrmmEx_ +#define clAmdBlasStrmv clAmdBlasStrmv_ +#define clAmdBlasStrsm clAmdBlasStrsm_ +#define clAmdBlasStrsmEx clAmdBlasStrsmEx_ +#define clAmdBlasStrsv clAmdBlasStrsv_ +#define clAmdBlasTeardown clAmdBlasTeardown_ +#define clAmdBlasZaxpy clAmdBlasZaxpy_ +#define clAmdBlasZcopy clAmdBlasZcopy_ +#define clAmdBlasZdotc clAmdBlasZdotc_ +#define clAmdBlasZdotu clAmdBlasZdotu_ +#define clAmdBlasZdrot clAmdBlasZdrot_ +#define clAmdBlasZdscal clAmdBlasZdscal_ +#define clAmdBlasZgbmv clAmdBlasZgbmv_ +#define clAmdBlasZgemm clAmdBlasZgemm_ +#define clAmdBlasZgemmEx clAmdBlasZgemmEx_ +#define clAmdBlasZgemv clAmdBlasZgemv_ +#define clAmdBlasZgemvEx clAmdBlasZgemvEx_ +#define clAmdBlasZgerc clAmdBlasZgerc_ +#define clAmdBlasZgeru clAmdBlasZgeru_ +#define clAmdBlasZhbmv clAmdBlasZhbmv_ +#define clAmdBlasZhemm clAmdBlasZhemm_ +#define clAmdBlasZhemv clAmdBlasZhemv_ +#define clAmdBlasZher clAmdBlasZher_ +#define clAmdBlasZher2 clAmdBlasZher2_ +#define clAmdBlasZher2k clAmdBlasZher2k_ +#define clAmdBlasZherk clAmdBlasZherk_ +#define clAmdBlasZhpmv clAmdBlasZhpmv_ +#define clAmdBlasZhpr clAmdBlasZhpr_ +#define clAmdBlasZhpr2 clAmdBlasZhpr2_ +#define clAmdBlasZrotg clAmdBlasZrotg_ +#define clAmdBlasZscal clAmdBlasZscal_ +#define clAmdBlasZswap clAmdBlasZswap_ +#define clAmdBlasZsymm clAmdBlasZsymm_ +#define clAmdBlasZsyr2k clAmdBlasZsyr2k_ +#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_ +#define clAmdBlasZsyrk clAmdBlasZsyrk_ +#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_ +#define clAmdBlasZtbmv clAmdBlasZtbmv_ +#define clAmdBlasZtbsv clAmdBlasZtbsv_ +#define clAmdBlasZtpmv clAmdBlasZtpmv_ +#define clAmdBlasZtpsv clAmdBlasZtpsv_ +#define clAmdBlasZtrmm clAmdBlasZtrmm_ +#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_ +#define clAmdBlasZtrmv clAmdBlasZtrmv_ +#define clAmdBlasZtrsm clAmdBlasZtrsm_ +#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_ +#define clAmdBlasZtrsv clAmdBlasZtrsv_ +#define clAmdBlasiCamax clAmdBlasiCamax_ +#define clAmdBlasiDamax clAmdBlasiDamax_ +#define clAmdBlasiSamax clAmdBlasiSamax_ +#define clAmdBlasiZamax clAmdBlasiZamax_ + +#include + +// generated by parser_clamdblas.py +#undef clAmdBlasAddScratchImage +//#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_pfn +#undef clAmdBlasCaxpy +//#define clAmdBlasCaxpy clAmdBlasCaxpy_pfn +#undef clAmdBlasCcopy +//#define clAmdBlasCcopy clAmdBlasCcopy_pfn +#undef clAmdBlasCdotc +//#define clAmdBlasCdotc clAmdBlasCdotc_pfn +#undef clAmdBlasCdotu +//#define clAmdBlasCdotu clAmdBlasCdotu_pfn +#undef clAmdBlasCgbmv +//#define clAmdBlasCgbmv clAmdBlasCgbmv_pfn +#undef clAmdBlasCgemm +//#define clAmdBlasCgemm clAmdBlasCgemm_pfn +#undef clAmdBlasCgemmEx +#define clAmdBlasCgemmEx clAmdBlasCgemmEx_pfn +#undef clAmdBlasCgemv +//#define clAmdBlasCgemv clAmdBlasCgemv_pfn +#undef clAmdBlasCgemvEx +//#define clAmdBlasCgemvEx clAmdBlasCgemvEx_pfn +#undef clAmdBlasCgerc +//#define clAmdBlasCgerc clAmdBlasCgerc_pfn +#undef clAmdBlasCgeru +//#define clAmdBlasCgeru clAmdBlasCgeru_pfn +#undef clAmdBlasChbmv +//#define clAmdBlasChbmv clAmdBlasChbmv_pfn +#undef clAmdBlasChemm +//#define clAmdBlasChemm clAmdBlasChemm_pfn +#undef clAmdBlasChemv +//#define clAmdBlasChemv clAmdBlasChemv_pfn +#undef clAmdBlasCher +//#define clAmdBlasCher clAmdBlasCher_pfn +#undef clAmdBlasCher2 +//#define clAmdBlasCher2 clAmdBlasCher2_pfn +#undef clAmdBlasCher2k +//#define clAmdBlasCher2k clAmdBlasCher2k_pfn +#undef clAmdBlasCherk +//#define clAmdBlasCherk clAmdBlasCherk_pfn +#undef clAmdBlasChpmv +//#define clAmdBlasChpmv clAmdBlasChpmv_pfn +#undef clAmdBlasChpr +//#define clAmdBlasChpr clAmdBlasChpr_pfn +#undef clAmdBlasChpr2 +//#define clAmdBlasChpr2 clAmdBlasChpr2_pfn +#undef clAmdBlasCrotg +//#define clAmdBlasCrotg clAmdBlasCrotg_pfn +#undef clAmdBlasCscal +//#define clAmdBlasCscal clAmdBlasCscal_pfn +#undef clAmdBlasCsrot +//#define clAmdBlasCsrot clAmdBlasCsrot_pfn +#undef clAmdBlasCsscal +//#define clAmdBlasCsscal clAmdBlasCsscal_pfn +#undef clAmdBlasCswap +//#define clAmdBlasCswap clAmdBlasCswap_pfn +#undef clAmdBlasCsymm +//#define clAmdBlasCsymm clAmdBlasCsymm_pfn +#undef clAmdBlasCsyr2k +//#define clAmdBlasCsyr2k clAmdBlasCsyr2k_pfn +#undef clAmdBlasCsyr2kEx +//#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_pfn +#undef clAmdBlasCsyrk +//#define clAmdBlasCsyrk clAmdBlasCsyrk_pfn +#undef clAmdBlasCsyrkEx +//#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_pfn +#undef clAmdBlasCtbmv +//#define clAmdBlasCtbmv clAmdBlasCtbmv_pfn +#undef clAmdBlasCtbsv +//#define clAmdBlasCtbsv clAmdBlasCtbsv_pfn +#undef clAmdBlasCtpmv +//#define clAmdBlasCtpmv clAmdBlasCtpmv_pfn +#undef clAmdBlasCtpsv +//#define clAmdBlasCtpsv clAmdBlasCtpsv_pfn +#undef clAmdBlasCtrmm +//#define clAmdBlasCtrmm clAmdBlasCtrmm_pfn +#undef clAmdBlasCtrmmEx +//#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_pfn +#undef clAmdBlasCtrmv +//#define clAmdBlasCtrmv clAmdBlasCtrmv_pfn +#undef clAmdBlasCtrsm +//#define clAmdBlasCtrsm clAmdBlasCtrsm_pfn +#undef clAmdBlasCtrsmEx +//#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_pfn +#undef clAmdBlasCtrsv +//#define clAmdBlasCtrsv clAmdBlasCtrsv_pfn +#undef clAmdBlasDasum +//#define clAmdBlasDasum clAmdBlasDasum_pfn +#undef clAmdBlasDaxpy +//#define clAmdBlasDaxpy clAmdBlasDaxpy_pfn +#undef clAmdBlasDcopy +//#define clAmdBlasDcopy clAmdBlasDcopy_pfn +#undef clAmdBlasDdot +//#define clAmdBlasDdot clAmdBlasDdot_pfn +#undef clAmdBlasDgbmv +//#define clAmdBlasDgbmv clAmdBlasDgbmv_pfn +#undef clAmdBlasDgemm +//#define clAmdBlasDgemm clAmdBlasDgemm_pfn +#undef clAmdBlasDgemmEx +#define clAmdBlasDgemmEx clAmdBlasDgemmEx_pfn +#undef clAmdBlasDgemv +//#define clAmdBlasDgemv clAmdBlasDgemv_pfn +#undef clAmdBlasDgemvEx +//#define clAmdBlasDgemvEx clAmdBlasDgemvEx_pfn +#undef clAmdBlasDger +//#define clAmdBlasDger clAmdBlasDger_pfn +#undef clAmdBlasDnrm2 +//#define clAmdBlasDnrm2 clAmdBlasDnrm2_pfn +#undef clAmdBlasDrot +//#define clAmdBlasDrot clAmdBlasDrot_pfn +#undef clAmdBlasDrotg +//#define clAmdBlasDrotg clAmdBlasDrotg_pfn +#undef clAmdBlasDrotm +//#define clAmdBlasDrotm clAmdBlasDrotm_pfn +#undef clAmdBlasDrotmg +//#define clAmdBlasDrotmg clAmdBlasDrotmg_pfn +#undef clAmdBlasDsbmv +//#define clAmdBlasDsbmv clAmdBlasDsbmv_pfn +#undef clAmdBlasDscal +//#define clAmdBlasDscal clAmdBlasDscal_pfn +#undef clAmdBlasDspmv +//#define clAmdBlasDspmv clAmdBlasDspmv_pfn +#undef clAmdBlasDspr +//#define clAmdBlasDspr clAmdBlasDspr_pfn +#undef clAmdBlasDspr2 +//#define clAmdBlasDspr2 clAmdBlasDspr2_pfn +#undef clAmdBlasDswap +//#define clAmdBlasDswap clAmdBlasDswap_pfn +#undef clAmdBlasDsymm +//#define clAmdBlasDsymm clAmdBlasDsymm_pfn +#undef clAmdBlasDsymv +//#define clAmdBlasDsymv clAmdBlasDsymv_pfn +#undef clAmdBlasDsymvEx +//#define clAmdBlasDsymvEx clAmdBlasDsymvEx_pfn +#undef clAmdBlasDsyr +//#define clAmdBlasDsyr clAmdBlasDsyr_pfn +#undef clAmdBlasDsyr2 +//#define clAmdBlasDsyr2 clAmdBlasDsyr2_pfn +#undef clAmdBlasDsyr2k +//#define clAmdBlasDsyr2k clAmdBlasDsyr2k_pfn +#undef clAmdBlasDsyr2kEx +//#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_pfn +#undef clAmdBlasDsyrk +//#define clAmdBlasDsyrk clAmdBlasDsyrk_pfn +#undef clAmdBlasDsyrkEx +//#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_pfn +#undef clAmdBlasDtbmv +//#define clAmdBlasDtbmv clAmdBlasDtbmv_pfn +#undef clAmdBlasDtbsv +//#define clAmdBlasDtbsv clAmdBlasDtbsv_pfn +#undef clAmdBlasDtpmv +//#define clAmdBlasDtpmv clAmdBlasDtpmv_pfn +#undef clAmdBlasDtpsv +//#define clAmdBlasDtpsv clAmdBlasDtpsv_pfn +#undef clAmdBlasDtrmm +//#define clAmdBlasDtrmm clAmdBlasDtrmm_pfn +#undef clAmdBlasDtrmmEx +//#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_pfn +#undef clAmdBlasDtrmv +//#define clAmdBlasDtrmv clAmdBlasDtrmv_pfn +#undef clAmdBlasDtrsm +//#define clAmdBlasDtrsm clAmdBlasDtrsm_pfn +#undef clAmdBlasDtrsmEx +//#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_pfn +#undef clAmdBlasDtrsv +//#define clAmdBlasDtrsv clAmdBlasDtrsv_pfn +#undef clAmdBlasDzasum +//#define clAmdBlasDzasum clAmdBlasDzasum_pfn +#undef clAmdBlasDznrm2 +//#define clAmdBlasDznrm2 clAmdBlasDznrm2_pfn +#undef clAmdBlasGetVersion +//#define clAmdBlasGetVersion clAmdBlasGetVersion_pfn +#undef clAmdBlasRemoveScratchImage +//#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_pfn +#undef clAmdBlasSasum +//#define clAmdBlasSasum clAmdBlasSasum_pfn +#undef clAmdBlasSaxpy +//#define clAmdBlasSaxpy clAmdBlasSaxpy_pfn +#undef clAmdBlasScasum +//#define clAmdBlasScasum clAmdBlasScasum_pfn +#undef clAmdBlasScnrm2 +//#define clAmdBlasScnrm2 clAmdBlasScnrm2_pfn +#undef clAmdBlasScopy +//#define clAmdBlasScopy clAmdBlasScopy_pfn +#undef clAmdBlasSdot +//#define clAmdBlasSdot clAmdBlasSdot_pfn +#undef clAmdBlasSetup +#define clAmdBlasSetup clAmdBlasSetup_pfn +#undef clAmdBlasSgbmv +//#define clAmdBlasSgbmv clAmdBlasSgbmv_pfn +#undef clAmdBlasSgemm +//#define clAmdBlasSgemm clAmdBlasSgemm_pfn +#undef clAmdBlasSgemmEx +#define clAmdBlasSgemmEx clAmdBlasSgemmEx_pfn +#undef clAmdBlasSgemv +//#define clAmdBlasSgemv clAmdBlasSgemv_pfn +#undef clAmdBlasSgemvEx +//#define clAmdBlasSgemvEx clAmdBlasSgemvEx_pfn +#undef clAmdBlasSger +//#define clAmdBlasSger clAmdBlasSger_pfn +#undef clAmdBlasSnrm2 +//#define clAmdBlasSnrm2 clAmdBlasSnrm2_pfn +#undef clAmdBlasSrot +//#define clAmdBlasSrot clAmdBlasSrot_pfn +#undef clAmdBlasSrotg +//#define clAmdBlasSrotg clAmdBlasSrotg_pfn +#undef clAmdBlasSrotm +//#define clAmdBlasSrotm clAmdBlasSrotm_pfn +#undef clAmdBlasSrotmg +//#define clAmdBlasSrotmg clAmdBlasSrotmg_pfn +#undef clAmdBlasSsbmv +//#define clAmdBlasSsbmv clAmdBlasSsbmv_pfn +#undef clAmdBlasSscal +//#define clAmdBlasSscal clAmdBlasSscal_pfn +#undef clAmdBlasSspmv +//#define clAmdBlasSspmv clAmdBlasSspmv_pfn +#undef clAmdBlasSspr +//#define clAmdBlasSspr clAmdBlasSspr_pfn +#undef clAmdBlasSspr2 +//#define clAmdBlasSspr2 clAmdBlasSspr2_pfn +#undef clAmdBlasSswap +//#define clAmdBlasSswap clAmdBlasSswap_pfn +#undef clAmdBlasSsymm +//#define clAmdBlasSsymm clAmdBlasSsymm_pfn +#undef clAmdBlasSsymv +//#define clAmdBlasSsymv clAmdBlasSsymv_pfn +#undef clAmdBlasSsymvEx +//#define clAmdBlasSsymvEx clAmdBlasSsymvEx_pfn +#undef clAmdBlasSsyr +//#define clAmdBlasSsyr clAmdBlasSsyr_pfn +#undef clAmdBlasSsyr2 +//#define clAmdBlasSsyr2 clAmdBlasSsyr2_pfn +#undef clAmdBlasSsyr2k +//#define clAmdBlasSsyr2k clAmdBlasSsyr2k_pfn +#undef clAmdBlasSsyr2kEx +//#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_pfn +#undef clAmdBlasSsyrk +//#define clAmdBlasSsyrk clAmdBlasSsyrk_pfn +#undef clAmdBlasSsyrkEx +//#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_pfn +#undef clAmdBlasStbmv +//#define clAmdBlasStbmv clAmdBlasStbmv_pfn +#undef clAmdBlasStbsv +//#define clAmdBlasStbsv clAmdBlasStbsv_pfn +#undef clAmdBlasStpmv +//#define clAmdBlasStpmv clAmdBlasStpmv_pfn +#undef clAmdBlasStpsv +//#define clAmdBlasStpsv clAmdBlasStpsv_pfn +#undef clAmdBlasStrmm +//#define clAmdBlasStrmm clAmdBlasStrmm_pfn +#undef clAmdBlasStrmmEx +//#define clAmdBlasStrmmEx clAmdBlasStrmmEx_pfn +#undef clAmdBlasStrmv +//#define clAmdBlasStrmv clAmdBlasStrmv_pfn +#undef clAmdBlasStrsm +//#define clAmdBlasStrsm clAmdBlasStrsm_pfn +#undef clAmdBlasStrsmEx +//#define clAmdBlasStrsmEx clAmdBlasStrsmEx_pfn +#undef clAmdBlasStrsv +//#define clAmdBlasStrsv clAmdBlasStrsv_pfn +#undef clAmdBlasTeardown +#define clAmdBlasTeardown clAmdBlasTeardown_pfn +#undef clAmdBlasZaxpy +//#define clAmdBlasZaxpy clAmdBlasZaxpy_pfn +#undef clAmdBlasZcopy +//#define clAmdBlasZcopy clAmdBlasZcopy_pfn +#undef clAmdBlasZdotc +//#define clAmdBlasZdotc clAmdBlasZdotc_pfn +#undef clAmdBlasZdotu +//#define clAmdBlasZdotu clAmdBlasZdotu_pfn +#undef clAmdBlasZdrot +//#define clAmdBlasZdrot clAmdBlasZdrot_pfn +#undef clAmdBlasZdscal +//#define clAmdBlasZdscal clAmdBlasZdscal_pfn +#undef clAmdBlasZgbmv +//#define clAmdBlasZgbmv clAmdBlasZgbmv_pfn +#undef clAmdBlasZgemm +//#define clAmdBlasZgemm clAmdBlasZgemm_pfn +#undef clAmdBlasZgemmEx +#define clAmdBlasZgemmEx clAmdBlasZgemmEx_pfn +#undef clAmdBlasZgemv +//#define clAmdBlasZgemv clAmdBlasZgemv_pfn +#undef clAmdBlasZgemvEx +//#define clAmdBlasZgemvEx clAmdBlasZgemvEx_pfn +#undef clAmdBlasZgerc +//#define clAmdBlasZgerc clAmdBlasZgerc_pfn +#undef clAmdBlasZgeru +//#define clAmdBlasZgeru clAmdBlasZgeru_pfn +#undef clAmdBlasZhbmv +//#define clAmdBlasZhbmv clAmdBlasZhbmv_pfn +#undef clAmdBlasZhemm +//#define clAmdBlasZhemm clAmdBlasZhemm_pfn +#undef clAmdBlasZhemv +//#define clAmdBlasZhemv clAmdBlasZhemv_pfn +#undef clAmdBlasZher +//#define clAmdBlasZher clAmdBlasZher_pfn +#undef clAmdBlasZher2 +//#define clAmdBlasZher2 clAmdBlasZher2_pfn +#undef clAmdBlasZher2k +//#define clAmdBlasZher2k clAmdBlasZher2k_pfn +#undef clAmdBlasZherk +//#define clAmdBlasZherk clAmdBlasZherk_pfn +#undef clAmdBlasZhpmv +//#define clAmdBlasZhpmv clAmdBlasZhpmv_pfn +#undef clAmdBlasZhpr +//#define clAmdBlasZhpr clAmdBlasZhpr_pfn +#undef clAmdBlasZhpr2 +//#define clAmdBlasZhpr2 clAmdBlasZhpr2_pfn +#undef clAmdBlasZrotg +//#define clAmdBlasZrotg clAmdBlasZrotg_pfn +#undef clAmdBlasZscal +//#define clAmdBlasZscal clAmdBlasZscal_pfn +#undef clAmdBlasZswap +//#define clAmdBlasZswap clAmdBlasZswap_pfn +#undef clAmdBlasZsymm +//#define clAmdBlasZsymm clAmdBlasZsymm_pfn +#undef clAmdBlasZsyr2k +//#define clAmdBlasZsyr2k clAmdBlasZsyr2k_pfn +#undef clAmdBlasZsyr2kEx +//#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_pfn +#undef clAmdBlasZsyrk +//#define clAmdBlasZsyrk clAmdBlasZsyrk_pfn +#undef clAmdBlasZsyrkEx +//#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_pfn +#undef clAmdBlasZtbmv +//#define clAmdBlasZtbmv clAmdBlasZtbmv_pfn +#undef clAmdBlasZtbsv +//#define clAmdBlasZtbsv clAmdBlasZtbsv_pfn +#undef clAmdBlasZtpmv +//#define clAmdBlasZtpmv clAmdBlasZtpmv_pfn +#undef clAmdBlasZtpsv +//#define clAmdBlasZtpsv clAmdBlasZtpsv_pfn +#undef clAmdBlasZtrmm +//#define clAmdBlasZtrmm clAmdBlasZtrmm_pfn +#undef clAmdBlasZtrmmEx +//#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_pfn +#undef clAmdBlasZtrmv +//#define clAmdBlasZtrmv clAmdBlasZtrmv_pfn +#undef clAmdBlasZtrsm +//#define clAmdBlasZtrsm clAmdBlasZtrsm_pfn +#undef clAmdBlasZtrsmEx +//#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_pfn +#undef clAmdBlasZtrsv +//#define clAmdBlasZtrsv clAmdBlasZtrsv_pfn +#undef clAmdBlasiCamax +//#define clAmdBlasiCamax clAmdBlasiCamax_pfn +#undef clAmdBlasiDamax +//#define clAmdBlasiDamax clAmdBlasiDamax_pfn +#undef clAmdBlasiSamax +//#define clAmdBlasiSamax clAmdBlasiSamax_pfn +#undef clAmdBlasiZamax +//#define clAmdBlasiZamax clAmdBlasiZamax_pfn + +// generated by parser_clamdblas.py +//extern CL_RUNTIME_EXPORT cl_ulong (*clAmdBlasAddScratchImage)(cl_context context, size_t width, size_t height, clAmdBlasStatus* status); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDger)(clAmdBlasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasRemoveScratchImage)(cl_ulong imageID); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSetup)(); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSger)(clAmdBlasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT void (*clAmdBlasTeardown)(); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp new file mode 100644 index 0000000..1457d7e --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp @@ -0,0 +1,142 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP +#error "Invalid usage" +#endif + +// generated by parser_clamdfft.py +#define clAmdFftBakePlan clAmdFftBakePlan_ +#define clAmdFftCopyPlan clAmdFftCopyPlan_ +#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_ +#define clAmdFftDestroyPlan clAmdFftDestroyPlan_ +#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_ +#define clAmdFftGetLayout clAmdFftGetLayout_ +#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_ +#define clAmdFftGetPlanContext clAmdFftGetPlanContext_ +#define clAmdFftGetPlanDim clAmdFftGetPlanDim_ +#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_ +#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_ +#define clAmdFftGetPlanLength clAmdFftGetPlanLength_ +#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_ +#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_ +#define clAmdFftGetPlanScale clAmdFftGetPlanScale_ +#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_ +#define clAmdFftGetResultLocation clAmdFftGetResultLocation_ +#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_ +#define clAmdFftGetVersion clAmdFftGetVersion_ +#define clAmdFftSetLayout clAmdFftSetLayout_ +#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_ +#define clAmdFftSetPlanDim clAmdFftSetPlanDim_ +#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_ +#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_ +#define clAmdFftSetPlanLength clAmdFftSetPlanLength_ +#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_ +#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_ +#define clAmdFftSetPlanScale clAmdFftSetPlanScale_ +#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_ +#define clAmdFftSetResultLocation clAmdFftSetResultLocation_ +#define clAmdFftSetup clAmdFftSetup_ +#define clAmdFftTeardown clAmdFftTeardown_ + +#include + +// generated by parser_clamdfft.py +#undef clAmdFftBakePlan +#define clAmdFftBakePlan clAmdFftBakePlan_pfn +#undef clAmdFftCopyPlan +//#define clAmdFftCopyPlan clAmdFftCopyPlan_pfn +#undef clAmdFftCreateDefaultPlan +#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_pfn +#undef clAmdFftDestroyPlan +#define clAmdFftDestroyPlan clAmdFftDestroyPlan_pfn +#undef clAmdFftEnqueueTransform +#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_pfn +#undef clAmdFftGetLayout +//#define clAmdFftGetLayout clAmdFftGetLayout_pfn +#undef clAmdFftGetPlanBatchSize +//#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_pfn +#undef clAmdFftGetPlanContext +//#define clAmdFftGetPlanContext clAmdFftGetPlanContext_pfn +#undef clAmdFftGetPlanDim +//#define clAmdFftGetPlanDim clAmdFftGetPlanDim_pfn +#undef clAmdFftGetPlanDistance +//#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_pfn +#undef clAmdFftGetPlanInStride +//#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_pfn +#undef clAmdFftGetPlanLength +//#define clAmdFftGetPlanLength clAmdFftGetPlanLength_pfn +#undef clAmdFftGetPlanOutStride +//#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_pfn +#undef clAmdFftGetPlanPrecision +//#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_pfn +#undef clAmdFftGetPlanScale +//#define clAmdFftGetPlanScale clAmdFftGetPlanScale_pfn +#undef clAmdFftGetPlanTransposeResult +//#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_pfn +#undef clAmdFftGetResultLocation +//#define clAmdFftGetResultLocation clAmdFftGetResultLocation_pfn +#undef clAmdFftGetTmpBufSize +#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_pfn +#undef clAmdFftGetVersion +#define clAmdFftGetVersion clAmdFftGetVersion_pfn +#undef clAmdFftSetLayout +#define clAmdFftSetLayout clAmdFftSetLayout_pfn +#undef clAmdFftSetPlanBatchSize +#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_pfn +#undef clAmdFftSetPlanDim +//#define clAmdFftSetPlanDim clAmdFftSetPlanDim_pfn +#undef clAmdFftSetPlanDistance +#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_pfn +#undef clAmdFftSetPlanInStride +#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_pfn +#undef clAmdFftSetPlanLength +//#define clAmdFftSetPlanLength clAmdFftSetPlanLength_pfn +#undef clAmdFftSetPlanOutStride +#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn +#undef clAmdFftSetPlanPrecision +#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn +#undef clAmdFftSetPlanScale +#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn +#undef clAmdFftSetPlanTransposeResult +//#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_pfn +#undef clAmdFftSetResultLocation +#define clAmdFftSetResultLocation clAmdFftSetResultLocation_pfn +#undef clAmdFftSetup +#define clAmdFftSetup clAmdFftSetup_pfn +#undef clAmdFftTeardown +#define clAmdFftTeardown clAmdFftTeardown_pfn + +// generated by parser_clamdfft.py +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clAmdFftPlanHandle plHandle, void* user_data), void* user_data); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle* plHandle); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle plHandle, size_t* batchSize); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle plHandle, cl_context* context); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed* transposed); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle plHandle, size_t* buffersize); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle plHandle, size_t batchSize); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData* setupData); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftTeardown)(); diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_core.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_core.hpp new file mode 100644 index 0000000..28618a1 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_core.hpp @@ -0,0 +1,371 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#define clBuildProgram clBuildProgram_ +#define clCompileProgram clCompileProgram_ +#define clCreateBuffer clCreateBuffer_ +#define clCreateCommandQueue clCreateCommandQueue_ +#define clCreateContext clCreateContext_ +#define clCreateContextFromType clCreateContextFromType_ +#define clCreateImage clCreateImage_ +#define clCreateImage2D clCreateImage2D_ +#define clCreateImage3D clCreateImage3D_ +#define clCreateKernel clCreateKernel_ +#define clCreateKernelsInProgram clCreateKernelsInProgram_ +#define clCreateProgramWithBinary clCreateProgramWithBinary_ +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_ +#define clCreateProgramWithSource clCreateProgramWithSource_ +#define clCreateSampler clCreateSampler_ +#define clCreateSubBuffer clCreateSubBuffer_ +#define clCreateSubDevices clCreateSubDevices_ +#define clCreateUserEvent clCreateUserEvent_ +#define clEnqueueBarrier clEnqueueBarrier_ +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_ +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_ +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_ +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_ +#define clEnqueueCopyImage clEnqueueCopyImage_ +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_ +#define clEnqueueFillBuffer clEnqueueFillBuffer_ +#define clEnqueueFillImage clEnqueueFillImage_ +#define clEnqueueMapBuffer clEnqueueMapBuffer_ +#define clEnqueueMapImage clEnqueueMapImage_ +#define clEnqueueMarker clEnqueueMarker_ +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_ +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_ +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_ +#define clEnqueueNativeKernel clEnqueueNativeKernel_ +#define clEnqueueReadBuffer clEnqueueReadBuffer_ +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_ +#define clEnqueueReadImage clEnqueueReadImage_ +#define clEnqueueTask clEnqueueTask_ +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_ +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_ +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_ +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_ +#define clEnqueueWriteImage clEnqueueWriteImage_ +#define clFinish clFinish_ +#define clFlush clFlush_ +#define clGetCommandQueueInfo clGetCommandQueueInfo_ +#define clGetContextInfo clGetContextInfo_ +#define clGetDeviceIDs clGetDeviceIDs_ +#define clGetDeviceInfo clGetDeviceInfo_ +#define clGetEventInfo clGetEventInfo_ +#define clGetEventProfilingInfo clGetEventProfilingInfo_ +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_ +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_ +#define clGetImageInfo clGetImageInfo_ +#define clGetKernelArgInfo clGetKernelArgInfo_ +#define clGetKernelInfo clGetKernelInfo_ +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_ +#define clGetMemObjectInfo clGetMemObjectInfo_ +#define clGetPlatformIDs clGetPlatformIDs_ +#define clGetPlatformInfo clGetPlatformInfo_ +#define clGetProgramBuildInfo clGetProgramBuildInfo_ +#define clGetProgramInfo clGetProgramInfo_ +#define clGetSamplerInfo clGetSamplerInfo_ +#define clGetSupportedImageFormats clGetSupportedImageFormats_ +#define clLinkProgram clLinkProgram_ +#define clReleaseCommandQueue clReleaseCommandQueue_ +#define clReleaseContext clReleaseContext_ +#define clReleaseDevice clReleaseDevice_ +#define clReleaseEvent clReleaseEvent_ +#define clReleaseKernel clReleaseKernel_ +#define clReleaseMemObject clReleaseMemObject_ +#define clReleaseProgram clReleaseProgram_ +#define clReleaseSampler clReleaseSampler_ +#define clRetainCommandQueue clRetainCommandQueue_ +#define clRetainContext clRetainContext_ +#define clRetainDevice clRetainDevice_ +#define clRetainEvent clRetainEvent_ +#define clRetainKernel clRetainKernel_ +#define clRetainMemObject clRetainMemObject_ +#define clRetainProgram clRetainProgram_ +#define clRetainSampler clRetainSampler_ +#define clSetEventCallback clSetEventCallback_ +#define clSetKernelArg clSetKernelArg_ +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_ +#define clSetUserEventStatus clSetUserEventStatus_ +#define clUnloadCompiler clUnloadCompiler_ +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_ +#define clWaitForEvents clWaitForEvents_ + +#if defined __APPLE__ +#define CL_SILENCE_DEPRECATION +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clBuildProgram +#define clBuildProgram clBuildProgram_pfn +#undef clCompileProgram +#define clCompileProgram clCompileProgram_pfn +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_pfn +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_pfn +#undef clCreateContext +#define clCreateContext clCreateContext_pfn +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_pfn +#undef clCreateImage +#define clCreateImage clCreateImage_pfn +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_pfn +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_pfn +#undef clCreateKernel +#define clCreateKernel clCreateKernel_pfn +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_pfn +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_pfn +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_pfn +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_pfn +#undef clCreateSampler +#define clCreateSampler clCreateSampler_pfn +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_pfn +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_pfn +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_pfn +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_pfn +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_pfn +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_pfn +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_pfn +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_pfn +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_pfn +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_pfn +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_pfn +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_pfn +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_pfn +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_pfn +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_pfn +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_pfn +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_pfn +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_pfn +#undef clFinish +#define clFinish clFinish_pfn +#undef clFlush +#define clFlush clFlush_pfn +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_pfn +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_pfn +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_pfn +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_pfn +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_pfn +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_pfn +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_pfn +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_pfn +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_pfn +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_pfn +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_pfn +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_pfn +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_pfn +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_pfn +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_pfn +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_pfn +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_pfn +#undef clLinkProgram +#define clLinkProgram clLinkProgram_pfn +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_pfn +#undef clReleaseContext +#define clReleaseContext clReleaseContext_pfn +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_pfn +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_pfn +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_pfn +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_pfn +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_pfn +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_pfn +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_pfn +#undef clRetainContext +#define clRetainContext clRetainContext_pfn +#undef clRetainDevice +#define clRetainDevice clRetainDevice_pfn +#undef clRetainEvent +#define clRetainEvent clRetainEvent_pfn +#undef clRetainKernel +#define clRetainKernel clRetainKernel_pfn +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_pfn +#undef clRetainProgram +#define clRetainProgram clRetainProgram_pfn +#undef clRetainSampler +#define clRetainSampler clRetainSampler_pfn +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_pfn +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_pfn +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_pfn +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_pfn +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_pfn +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_pfn + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); +extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)(); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*); diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp new file mode 100644 index 0000000..216b22b --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp @@ -0,0 +1,272 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#undef clBuildProgram +#define clBuildProgram clBuildProgram_fn +inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCompileProgram +#define clCompileProgram clCompileProgram_fn +inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_fn +inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_fn +inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } +#undef clCreateContext +#define clCreateContext clCreateContext_fn +inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_fn +inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } +#undef clCreateImage +#define clCreateImage clCreateImage_fn +inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_fn +inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_fn +inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clCreateKernel +#define clCreateKernel clCreateKernel_fn +inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_fn +inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_fn +inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn +inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_fn +inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSampler +#define clCreateSampler clCreateSampler_fn +inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_fn +inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_fn +inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); } +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_fn +inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_fn +inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn +inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn +inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn +inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn +inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_fn +inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn +inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_fn +inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_fn +inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_fn +inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_fn +inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_fn +inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn +inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn +inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn +inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_fn +inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_fn +inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn +inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_fn +inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_fn +inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn +inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn +inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn +inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn +inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_fn +inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clFinish +#define clFinish clFinish_fn +inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } +#undef clFlush +#define clFlush clFlush_fn +inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_fn +inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_fn +inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_fn +inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_fn +inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_fn +inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_fn +inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn +inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn +inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); } +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_fn +inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_fn +inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_fn +inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn +inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_fn +inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_fn +inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_fn +inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_fn +inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_fn +inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_fn +inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_fn +inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } +#undef clLinkProgram +#define clLinkProgram clLinkProgram_fn +inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_fn +inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } +#undef clReleaseContext +#define clReleaseContext clReleaseContext_fn +inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_fn +inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); } +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_fn +inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_fn +inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_fn +inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_fn +inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_fn +inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_fn +inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } +#undef clRetainContext +#define clRetainContext clRetainContext_fn +inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } +#undef clRetainDevice +#define clRetainDevice clRetainDevice_fn +inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); } +#undef clRetainEvent +#define clRetainEvent clRetainEvent_fn +inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } +#undef clRetainKernel +#define clRetainKernel clRetainKernel_fn +inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_fn +inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } +#undef clRetainProgram +#define clRetainProgram clRetainProgram_fn +inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } +#undef clRetainSampler +#define clRetainSampler clRetainSampler_fn +inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_fn +inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_fn +inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn +inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_fn +inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_fn +inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn +inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); } +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_fn +inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_gl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_gl.hpp new file mode 100644 index 0000000..0b12aed --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_gl.hpp @@ -0,0 +1,62 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#define clCreateFromGLBuffer clCreateFromGLBuffer_ +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_ +#define clCreateFromGLTexture clCreateFromGLTexture_ +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_ +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_ +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_ +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_ +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_ +#define clGetGLObjectInfo clGetGLObjectInfo_ +#define clGetGLTextureInfo clGetGLTextureInfo_ + +#if defined __APPLE__ +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clCreateFromGLBuffer +#define clCreateFromGLBuffer clCreateFromGLBuffer_pfn +#undef clCreateFromGLRenderbuffer +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_pfn +#undef clCreateFromGLTexture +#define clCreateFromGLTexture clCreateFromGLTexture_pfn +#undef clCreateFromGLTexture2D +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_pfn +#undef clCreateFromGLTexture3D +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_pfn +#undef clEnqueueAcquireGLObjects +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_pfn +#undef clEnqueueReleaseGLObjects +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_pfn +#undef clGetGLContextInfoKHR +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_pfn +#undef clGetGLObjectInfo +#define clGetGLObjectInfo clGetGLObjectInfo_pfn +#undef clGetGLTextureInfo +#define clGetGLTextureInfo clGetGLTextureInfo_pfn + +#ifdef cl_khr_gl_sharing + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)(cl_context, cl_mem_flags, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture2D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture3D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLContextInfoKHR)(const cl_context_properties*, cl_gl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLObjectInfo)(cl_mem, cl_gl_object_type*, cl_GLuint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLTextureInfo)(cl_mem, cl_gl_texture_info, size_t, void*, size_t*); + +#endif // cl_khr_gl_sharing diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp new file mode 100644 index 0000000..12f342b --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp @@ -0,0 +1,42 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP +#error "Invalid usage" +#endif + +#ifdef cl_khr_gl_sharing + +// generated by parser_cl.py +#undef clCreateFromGLBuffer +#define clCreateFromGLBuffer clCreateFromGLBuffer_fn +inline cl_mem clCreateFromGLBuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, int* p3) { return clCreateFromGLBuffer_pfn(p0, p1, p2, p3); } +#undef clCreateFromGLRenderbuffer +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_fn +inline cl_mem clCreateFromGLRenderbuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, cl_int* p3) { return clCreateFromGLRenderbuffer_pfn(p0, p1, p2, p3); } +#undef clCreateFromGLTexture +#define clCreateFromGLTexture clCreateFromGLTexture_fn +inline cl_mem clCreateFromGLTexture(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateFromGLTexture2D +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_fn +inline cl_mem clCreateFromGLTexture2D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture2D_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateFromGLTexture3D +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_fn +inline cl_mem clCreateFromGLTexture3D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture3D_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueAcquireGLObjects +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_fn +inline cl_int clEnqueueAcquireGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueAcquireGLObjects_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueReleaseGLObjects +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_fn +inline cl_int clEnqueueReleaseGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueReleaseGLObjects_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetGLContextInfoKHR +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_fn +inline cl_int clGetGLContextInfoKHR(const cl_context_properties* p0, cl_gl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLContextInfoKHR_pfn(p0, p1, p2, p3, p4); } +#undef clGetGLObjectInfo +#define clGetGLObjectInfo clGetGLObjectInfo_fn +inline cl_int clGetGLObjectInfo(cl_mem p0, cl_gl_object_type* p1, cl_GLuint* p2) { return clGetGLObjectInfo_pfn(p0, p1, p2); } +#undef clGetGLTextureInfo +#define clGetGLTextureInfo clGetGLTextureInfo_fn +inline cl_int clGetGLTextureInfo(cl_mem p0, cl_gl_texture_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLTextureInfo_pfn(p0, p1, p2, p3, p4); } + +#endif // cl_khr_gl_sharing diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_clamdblas.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_clamdblas.hpp new file mode 100644 index 0000000..2ad8ac0 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_clamdblas.hpp @@ -0,0 +1,53 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP +#define OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP + +#ifdef HAVE_CLAMDBLAS + +#include "opencl_core.hpp" + +#include "autogenerated/opencl_clamdblas.hpp" + +#endif // HAVE_CLAMDBLAS + +#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_clamdfft.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_clamdfft.hpp new file mode 100644 index 0000000..a328f72 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_clamdfft.hpp @@ -0,0 +1,53 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP +#define OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP + +#ifdef HAVE_CLAMDFFT + +#include "opencl_core.hpp" + +#include "autogenerated/opencl_clamdfft.hpp" + +#endif // HAVE_CLAMDFFT + +#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_core.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_core.hpp new file mode 100644 index 0000000..0404b31 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_core.hpp @@ -0,0 +1,84 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP + +#ifdef HAVE_OPENCL + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_CORE_SHARED)) && (defined _WIN32 || defined WINCE) && \ + !(defined(__OPENCV_BUILD) && defined(OPENCV_MODULE_IS_PART_OF_WORLD)) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + +#ifdef HAVE_OPENCL_SVM +#define clSVMAlloc clSVMAlloc_ +#define clSVMFree clSVMFree_ +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_ +#define clSetKernelExecInfo clSetKernelExecInfo_ +#define clEnqueueSVMFree clEnqueueSVMFree_ +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_ +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_ +#define clEnqueueSVMMap clEnqueueSVMMap_ +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_ +#endif + +#include "autogenerated/opencl_core.hpp" + +#ifndef CL_DEVICE_DOUBLE_FP_CONFIG +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif + +#ifndef CL_DEVICE_HALF_FP_CONFIG +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 +#endif + +#ifndef CL_VERSION_1_2 +#define CV_REQUIRE_OPENCL_1_2_ERROR CV_Error(cv::Error::OpenCLApiCallError, "OpenCV compiled without OpenCL v1.2 support, so we can't use functionality from OpenCL v1.2") +#endif + +#endif // HAVE_OPENCL + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_core_wrappers.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_core_wrappers.hpp new file mode 100644 index 0000000..38fcae9 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_core_wrappers.hpp @@ -0,0 +1,47 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP + +#include "autogenerated/opencl_core_wrappers.hpp" + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_gl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_gl.hpp new file mode 100644 index 0000000..659c7d8 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_gl.hpp @@ -0,0 +1,53 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP + +#if defined HAVE_OPENCL && defined HAVE_OPENGL + +#include "opencl_core.hpp" + +#include "autogenerated/opencl_gl.hpp" + +#endif // defined HAVE_OPENCL && defined HAVE_OPENGL + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_gl_wrappers.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_gl_wrappers.hpp new file mode 100644 index 0000000..9700004 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_gl_wrappers.hpp @@ -0,0 +1,47 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP + +#include "autogenerated/opencl_gl_wrappers.hpp" + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_20.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_20.hpp new file mode 100644 index 0000000..9636b19 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_20.hpp @@ -0,0 +1,48 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#include "opencl_svm_definitions.hpp" + +#undef clSVMAlloc +#define clSVMAlloc clSVMAlloc_pfn +#undef clSVMFree +#define clSVMFree clSVMFree_pfn +#undef clSetKernelArgSVMPointer +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn +#undef clSetKernelExecInfo +//#define clSetKernelExecInfo clSetKernelExecInfo_pfn +#undef clEnqueueSVMFree +//#define clEnqueueSVMFree clEnqueueSVMFree_pfn +#undef clEnqueueSVMMemcpy +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn +#undef clEnqueueSVMMemFill +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn +#undef clEnqueueSVMMap +#define clEnqueueSVMMap clEnqueueSVMMap_pfn +#undef clEnqueueSVMUnmap +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn + +extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment); +extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value); +//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value); +//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[], +// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data, +// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); + +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_definitions.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_definitions.hpp new file mode 100644 index 0000000..97c927b --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_definitions.hpp @@ -0,0 +1,42 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP + +#if defined(HAVE_OPENCL_SVM) +#if defined(CL_VERSION_2_0) + +// OpenCL 2.0 contains SVM definitions + +#else + +typedef cl_bitfield cl_device_svm_capabilities; +typedef cl_bitfield cl_svm_mem_flags; +typedef cl_uint cl_kernel_exec_info; + +// +// TODO Add real values after OpenCL 2.0 release +// + +#ifndef CL_DEVICE_SVM_CAPABILITIES +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 + +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) +#endif + +#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) +#endif + +#ifndef CL_MEM_SVM_ATOMICS +#define CL_MEM_SVM_ATOMICS (1 << 11) +#endif + + +#endif // CL_VERSION_2_0 +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_hsa_extension.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_hsa_extension.hpp new file mode 100644 index 0000000..497bc3d --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opencl/runtime/opencl_svm_hsa_extension.hpp @@ -0,0 +1,166 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD +// +// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package. +// Below is the original copyright. +// +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/******************************************* + * Shared Virtual Memory (SVM) extension + *******************************************/ +typedef cl_bitfield cl_device_svm_capabilities_amd; +typedef cl_bitfield cl_svm_mem_flags_amd; +typedef cl_uint cl_kernel_exec_info_amd; + +/* cl_device_info */ +#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054 + +/* cl_device_svm_capabilities_amd */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3) + +/* cl_svm_mem_flags_amd */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10) +#define CL_MEM_SVM_ATOMICS_AMD (1 << 11) + +/* cl_mem_info */ +#define CL_MEM_USES_SVM_POINTER_AMD 0x1109 + +/* cl_kernel_exec_info_amd */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7 + +/* cl_command_type */ +#define CL_COMMAND_SVM_FREE_AMD 0x1209 +#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A +#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B +#define CL_COMMAND_SVM_MAP_AMD 0x120C +#define CL_COMMAND_SVM_UNMAP_AMD 0x120D + +typedef CL_API_ENTRY void* +(CL_API_CALL * clSVMAllocAMD_fn)( + cl_context /* context */, + cl_svm_mem_flags_amd /* flags */, + size_t /* size */, + unsigned int /* alignment */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY void +(CL_API_CALL * clSVMFreeAMD_fn)( + cl_context /* context */, + void* /* svm_pointer */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMFreeAMD_fn)( + cl_command_queue /* command_queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void (CL_CALLBACK *)( /*pfn_free_func*/ + cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void* /* user_data */), + void* /* user_data */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_copy */, + void* /* dst_ptr */, + const void* /* src_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + const void* /* pattern */, + size_t /* pattern_size */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMapAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + void* /* svm_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)( + cl_kernel /* kernel */, + cl_uint /* arg_index */, + const void * /* arg_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelExecInfoAMD_fn)( + cl_kernel /* kernel */, + cl_kernel_exec_info_amd /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +#endif + +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opengl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opengl.hpp new file mode 100644 index 0000000..a311ce2 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/opengl.hpp @@ -0,0 +1,725 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OPENGL_HPP +#define OPENCV_CORE_OPENGL_HPP + +#ifndef __cplusplus +# error opengl.hpp header must be compiled as C++ +#endif + +#include "opencv2/core.hpp" +#include "ocl.hpp" + +namespace cv { namespace ogl { + +/** @addtogroup core_opengl +This section describes OpenGL interoperability. + +To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is +supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not +supported). For GTK backend gtkglext-1.0 library is required. + +To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can +do this with namedWindow function or with other OpenGL toolkit (GLUT, for example). +*/ +//! @{ + +/////////////////// OpenGL Objects /////////////////// + +/** @brief Smart pointer for OpenGL buffer object with reference counting. + +Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL +context. These can be used to store vertex data, pixel data retrieved from images or the +framebuffer, and a variety of other things. + +ogl::Buffer has interface similar with Mat interface and represents 2D array memory. + +ogl::Buffer supports memory transfers between host and device and also can be mapped to CUDA memory. + */ +class CV_EXPORTS Buffer +{ +public: + /** @brief The target defines how you intend to use the buffer object. + */ + enum Target + { + ARRAY_BUFFER = 0x8892, //!< The buffer will be used as a source for vertex data + ELEMENT_ARRAY_BUFFER = 0x8893, //!< The buffer will be used for indices (in glDrawElements, for example) + PIXEL_PACK_BUFFER = 0x88EB, //!< The buffer will be used for reading from OpenGL textures + PIXEL_UNPACK_BUFFER = 0x88EC //!< The buffer will be used for writing to OpenGL textures + }; + + enum Access + { + READ_ONLY = 0x88B8, + WRITE_ONLY = 0x88B9, + READ_WRITE = 0x88BA + }; + + /** @brief The constructors. + + Creates empty ogl::Buffer object, creates ogl::Buffer object from existed buffer ( abufId + parameter), allocates memory for ogl::Buffer object or copies from host/device memory. + */ + Buffer(); + + /** @overload + @param arows Number of rows in a 2D array. + @param acols Number of columns in a 2D array. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param abufId Buffer object name. + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param abufId Buffer object name. + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false); + + /** @overload + @param arows Number of rows in a 2D array. + @param acols Number of columns in a 2D array. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ). + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + explicit Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @brief Allocates memory for ogl::Buffer object. + + @param arows Number of rows in a 2D array. + @param acols Number of columns in a 2D array. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @brief Decrements the reference counter and destroys the buffer object if needed. + + The function will call setAutoRelease(true) . + */ + void release(); + + /** @brief Sets auto release mode. + + The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was + bound to a window it could be released at any time (user can close a window). If object's destructor + is called after destruction of the context it will cause an error. Thus ogl::Buffer doesn't destroy + OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context). + This function can force ogl::Buffer destructor to destroy OpenGL object. + @param flag Auto release mode (if true, release will be called in object's destructor). + */ + void setAutoRelease(bool flag); + + /** @brief Copies from host/device memory to OpenGL buffer. + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ). + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload */ + void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @brief Copies from OpenGL buffer to host/device memory or another OpenGL buffer object. + + @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , std::vector or + ogl::Buffer ). + */ + void copyTo(OutputArray arr) const; + + /** @overload */ + void copyTo(OutputArray arr, cuda::Stream& stream) const; + + /** @brief Creates a full copy of the buffer object and the underlying data. + + @param target Buffer usage for destination buffer. + @param autoRelease Auto release mode for destination buffer. + */ + Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const; + + /** @brief Binds OpenGL buffer to the specified buffer binding point. + + @param target Binding point. See cv::ogl::Buffer::Target . + */ + void bind(Target target) const; + + /** @brief Unbind any buffers from the specified binding point. + + @param target Binding point. See cv::ogl::Buffer::Target . + */ + static void unbind(Target target); + + /** @brief Maps OpenGL buffer to host memory. + + mapHost maps to the client's address space the entire data store of the buffer object. The data can + then be directly read and/or written relative to the returned pointer, depending on the specified + access policy. + + A mapped data store must be unmapped with ogl::Buffer::unmapHost before its buffer object is used. + + This operation can lead to memory transfers between host and device. + + Only one buffer object can be mapped at a time. + @param access Access policy, indicating whether it will be possible to read from, write to, or both + read from and write to the buffer object's mapped data store. The symbolic constant must be + ogl::Buffer::READ_ONLY , ogl::Buffer::WRITE_ONLY or ogl::Buffer::READ_WRITE . + */ + Mat mapHost(Access access); + + /** @brief Unmaps OpenGL buffer. + */ + void unmapHost(); + + //! map to device memory (blocking) + cuda::GpuMat mapDevice(); + void unmapDevice(); + + /** @brief Maps OpenGL buffer to CUDA device memory. + + This operation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time. + + A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used. + */ + cuda::GpuMat mapDevice(cuda::Stream& stream); + + /** @brief Unmaps OpenGL buffer. + */ + void unmapDevice(cuda::Stream& stream); + + int rows() const; + int cols() const; + Size size() const; + bool empty() const; + + int type() const; + int depth() const; + int channels() const; + int elemSize() const; + int elemSize1() const; + + //! get OpenGL opject id + unsigned int bufId() const; + + class Impl; + +private: + Ptr impl_; + int rows_; + int cols_; + int type_; +}; + +/** @brief Smart pointer for OpenGL 2D texture memory with reference counting. + */ +class CV_EXPORTS Texture2D +{ +public: + /** @brief An Image Format describes the way that the images in Textures store their data. + */ + enum Format + { + NONE = 0, + DEPTH_COMPONENT = 0x1902, //!< Depth + RGB = 0x1907, //!< Red, Green, Blue + RGBA = 0x1908 //!< Red, Green, Blue, Alpha + }; + + /** @brief The constructors. + + Creates empty ogl::Texture2D object, allocates memory for ogl::Texture2D object or copies from + host/device memory. + */ + Texture2D(); + + /** @overload */ + Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false); + + /** @overload */ + Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false); + + /** @overload + @param arows Number of rows. + @param acols Number of columns. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Texture2D(int arows, int acols, Format aformat, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Texture2D(Size asize, Format aformat, bool autoRelease = false); + + /** @overload + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ). + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + explicit Texture2D(InputArray arr, bool autoRelease = false); + + /** @brief Allocates memory for ogl::Texture2D object. + + @param arows Number of rows. + @param acols Number of columns. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(int arows, int acols, Format aformat, bool autoRelease = false); + /** @overload + @param asize 2D array size. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(Size asize, Format aformat, bool autoRelease = false); + + /** @brief Decrements the reference counter and destroys the texture object if needed. + + The function will call setAutoRelease(true) . + */ + void release(); + + /** @brief Sets auto release mode. + + @param flag Auto release mode (if true, release will be called in object's destructor). + + The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was + bound to a window it could be released at any time (user can close a window). If object's destructor + is called after destruction of the context it will cause an error. Thus ogl::Texture2D doesn't + destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL + context). This function can force ogl::Texture2D destructor to destroy OpenGL object. + */ + void setAutoRelease(bool flag); + + /** @brief Copies from host/device memory to OpenGL texture. + + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ). + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void copyFrom(InputArray arr, bool autoRelease = false); + + /** @brief Copies from OpenGL texture to host/device memory or another OpenGL texture object. + + @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , ogl::Buffer or + ogl::Texture2D ). + @param ddepth Destination depth. + @param autoRelease Auto release mode for destination buffer (if arr is OpenGL buffer or texture). + */ + void copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const; + + /** @brief Binds texture to current active texture unit for GL_TEXTURE_2D target. + */ + void bind() const; + + int rows() const; + int cols() const; + Size size() const; + bool empty() const; + + Format format() const; + + //! get OpenGL opject id + unsigned int texId() const; + + class Impl; + +private: + Ptr impl_; + int rows_; + int cols_; + Format format_; +}; + +/** @brief Wrapper for OpenGL Client-Side Vertex arrays. + +ogl::Arrays stores vertex data in ogl::Buffer objects. + */ +class CV_EXPORTS Arrays +{ +public: + /** @brief Default constructor + */ + Arrays(); + + /** @brief Sets an array of vertex coordinates. + @param vertex array with vertex coordinates, can be both host and device memory. + */ + void setVertexArray(InputArray vertex); + + /** @brief Resets vertex coordinates. + */ + void resetVertexArray(); + + /** @brief Sets an array of vertex colors. + @param color array with vertex colors, can be both host and device memory. + */ + void setColorArray(InputArray color); + + /** @brief Resets vertex colors. + */ + void resetColorArray(); + + /** @brief Sets an array of vertex normals. + @param normal array with vertex normals, can be both host and device memory. + */ + void setNormalArray(InputArray normal); + + /** @brief Resets vertex normals. + */ + void resetNormalArray(); + + /** @brief Sets an array of vertex texture coordinates. + @param texCoord array with vertex texture coordinates, can be both host and device memory. + */ + void setTexCoordArray(InputArray texCoord); + + /** @brief Resets vertex texture coordinates. + */ + void resetTexCoordArray(); + + /** @brief Releases all inner buffers. + */ + void release(); + + /** @brief Sets auto release mode all inner buffers. + @param flag Auto release mode. + */ + void setAutoRelease(bool flag); + + /** @brief Binds all vertex arrays. + */ + void bind() const; + + /** @brief Returns the vertex count. + */ + int size() const; + bool empty() const; + +private: + int size_; + Buffer vertex_; + Buffer color_; + Buffer normal_; + Buffer texCoord_; +}; + +/////////////////// Render Functions /////////////////// + +//! render mode +enum RenderModes { + POINTS = 0x0000, + LINES = 0x0001, + LINE_LOOP = 0x0002, + LINE_STRIP = 0x0003, + TRIANGLES = 0x0004, + TRIANGLE_STRIP = 0x0005, + TRIANGLE_FAN = 0x0006, + QUADS = 0x0007, + QUAD_STRIP = 0x0008, + POLYGON = 0x0009 +}; + +/** @brief Render OpenGL texture or primitives. +@param tex Texture to draw. +@param wndRect Region of window, where to draw a texture (normalized coordinates). +@param texRect Region of texture to draw (normalized coordinates). + */ +CV_EXPORTS void render(const Texture2D& tex, + Rect_ wndRect = Rect_(0.0, 0.0, 1.0, 1.0), + Rect_ texRect = Rect_(0.0, 0.0, 1.0, 1.0)); + +/** @overload +@param arr Array of privitives vertices. +@param mode Render mode. One of cv::ogl::RenderModes +@param color Color for all vertices. Will be used if arr doesn't contain color array. +*/ +CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255)); + +/** @overload +@param arr Array of privitives vertices. +@param indices Array of vertices indices (host or device memory). +@param mode Render mode. One of cv::ogl::RenderModes +@param color Color for all vertices. Will be used if arr doesn't contain color array. +*/ +CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255)); + +/////////////////// CL-GL Interoperability Functions /////////////////// + +namespace ocl { +using namespace cv::ocl; + +// TODO static functions in the Context class +/** @brief Creates OpenCL context from GL. +@return Returns reference to OpenCL Context + */ +CV_EXPORTS Context& initializeContextFromGL(); + +} // namespace cv::ogl::ocl + +/** @brief Converts InputArray to Texture2D object. +@param src - source InputArray. +@param texture - destination Texture2D object. + */ +CV_EXPORTS void convertToGLTexture2D(InputArray src, Texture2D& texture); + +/** @brief Converts Texture2D object to OutputArray. +@param texture - source Texture2D object. +@param dst - destination OutputArray. + */ +CV_EXPORTS void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst); + +/** @brief Maps Buffer object to process on CL side (convert to UMat). + +Function creates CL buffer from GL one, and then constructs UMat that can be used +to process buffer data with OpenCV functions. Note that in current implementation +UMat constructed this way doesn't own corresponding GL buffer object, so it is +the user responsibility to close down CL/GL buffers relationships by explicitly +calling unmapGLBuffer() function. +@param buffer - source Buffer object. +@param accessFlags - data access flags (ACCESS_READ|ACCESS_WRITE). +@return Returns UMat object + */ +CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, AccessFlag accessFlags = ACCESS_READ | ACCESS_WRITE); + +/** @brief Unmaps Buffer object (releases UMat, previously mapped from Buffer). + +Function must be called explicitly by the user for each UMat previously constructed +by the call to mapGLBuffer() function. +@param u - source UMat, created by mapGLBuffer(). + */ +CV_EXPORTS void unmapGLBuffer(UMat& u); + +//! @} +}} // namespace cv::ogl + +namespace cv { namespace cuda { + +/** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability. + +This function should be explicitly called after OpenGL context creation and before any CUDA calls. +@param device System index of a CUDA device starting with 0. +@ingroup core_opengl + */ +CV_EXPORTS void setGlDevice(int device = 0); + +}} + +//! @cond IGNORED + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +inline +cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0) +{ + create(arows, acols, atype, target, autoRelease); +} + +inline +cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0) +{ + create(asize, atype, target, autoRelease); +} + +inline +void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease) +{ + create(asize.height, asize.width, atype, target, autoRelease); +} + +inline +int cv::ogl::Buffer::rows() const +{ + return rows_; +} + +inline +int cv::ogl::Buffer::cols() const +{ + return cols_; +} + +inline +cv::Size cv::ogl::Buffer::size() const +{ + return Size(cols_, rows_); +} + +inline +bool cv::ogl::Buffer::empty() const +{ + return rows_ == 0 || cols_ == 0; +} + +inline +int cv::ogl::Buffer::type() const +{ + return type_; +} + +inline +int cv::ogl::Buffer::depth() const +{ + return CV_MAT_DEPTH(type_); +} + +inline +int cv::ogl::Buffer::channels() const +{ + return CV_MAT_CN(type_); +} + +inline +int cv::ogl::Buffer::elemSize() const +{ + return CV_ELEM_SIZE(type_); +} + +inline +int cv::ogl::Buffer::elemSize1() const +{ + return CV_ELEM_SIZE1(type_); +} + +/////// + +inline +cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE) +{ + create(arows, acols, aformat, autoRelease); +} + +inline +cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE) +{ + create(asize, aformat, autoRelease); +} + +inline +void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease) +{ + create(asize.height, asize.width, aformat, autoRelease); +} + +inline +int cv::ogl::Texture2D::rows() const +{ + return rows_; +} + +inline +int cv::ogl::Texture2D::cols() const +{ + return cols_; +} + +inline +cv::Size cv::ogl::Texture2D::size() const +{ + return Size(cols_, rows_); +} + +inline +bool cv::ogl::Texture2D::empty() const +{ + return rows_ == 0 || cols_ == 0; +} + +inline +cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const +{ + return format_; +} + +/////// + +inline +cv::ogl::Arrays::Arrays() : size_(0) +{ +} + +inline +int cv::ogl::Arrays::size() const +{ + return size_; +} + +inline +bool cv::ogl::Arrays::empty() const +{ + return size_ == 0; +} + +//! @endcond + +#endif /* OPENCV_CORE_OPENGL_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/operations.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/operations.hpp new file mode 100644 index 0000000..bde28c4 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/operations.hpp @@ -0,0 +1,594 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OPERATIONS_HPP +#define OPENCV_CORE_OPERATIONS_HPP + +#ifndef __cplusplus +# error operations.hpp header must be compiled as C++ +#endif + +#include + +#if defined(__GNUC__) || defined(__clang__) // at least GCC 3.1+, clang 3.5+ +# if defined(__MINGW_PRINTF_FORMAT) // https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/. +# define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (__MINGW_PRINTF_FORMAT, string_idx, first_to_check))) +# else +# define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (printf, string_idx, first_to_check))) +# endif +#else +# define CV_FORMAT_PRINTF(A, B) +#endif + +//! @cond IGNORED + +namespace cv +{ + +////////////////////////////// Matx methods depending on core API ///////////////////////////// + +namespace internal +{ + +template struct Matx_FastInvOp +{ + bool operator()(const Matx<_Tp, m, n>& a, Matx<_Tp, n, m>& b, int method) const + { + return invert(a, b, method) != 0; + } +}; + +template struct Matx_FastInvOp<_Tp, m, m> +{ + bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const + { + if (method == DECOMP_LU || method == DECOMP_CHOLESKY) + { + Matx<_Tp, m, m> temp = a; + + // assume that b is all 0's on input => make it a unity matrix + for (int i = 0; i < m; i++) + b(i, i) = (_Tp)1; + + if (method == DECOMP_CHOLESKY) + return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m); + + return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0; + } + else + { + return invert(a, b, method) != 0; + } + } +}; + +template struct Matx_FastInvOp<_Tp, 2, 2> +{ + bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int /*method*/) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + b(1,1) = a(0,0)*d; + b(0,0) = a(1,1)*d; + b(0,1) = -a(0,1)*d; + b(1,0) = -a(1,0)*d; + return true; + } +}; + +template struct Matx_FastInvOp<_Tp, 3, 3> +{ + bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int /*method*/) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d; + b(0,1) = (a(0,2) * a(2,1) - a(0,1) * a(2,2)) * d; + b(0,2) = (a(0,1) * a(1,2) - a(0,2) * a(1,1)) * d; + + b(1,0) = (a(1,2) * a(2,0) - a(1,0) * a(2,2)) * d; + b(1,1) = (a(0,0) * a(2,2) - a(0,2) * a(2,0)) * d; + b(1,2) = (a(0,2) * a(1,0) - a(0,0) * a(1,2)) * d; + + b(2,0) = (a(1,0) * a(2,1) - a(1,1) * a(2,0)) * d; + b(2,1) = (a(0,1) * a(2,0) - a(0,0) * a(2,1)) * d; + b(2,2) = (a(0,0) * a(1,1) - a(0,1) * a(1,0)) * d; + return true; + } +}; + + +template struct Matx_FastSolveOp +{ + bool operator()(const Matx<_Tp, m, l>& a, const Matx<_Tp, m, n>& b, + Matx<_Tp, l, n>& x, int method) const + { + return cv::solve(a, b, x, method); + } +}; + +template struct Matx_FastSolveOp<_Tp, m, m, n> +{ + bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b, + Matx<_Tp, m, n>& x, int method) const + { + if (method == DECOMP_LU || method == DECOMP_CHOLESKY) + { + Matx<_Tp, m, m> temp = a; + x = b; + if( method == DECOMP_CHOLESKY ) + return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n); + + return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0; + } + else + { + return cv::solve(a, b, x, method); + } + } +}; + +template struct Matx_FastSolveOp<_Tp, 2, 2, 1> +{ + bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b, + Matx<_Tp, 2, 1>& x, int) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d; + x(1) = (b(1)*a(0,0) - b(0)*a(1,0))*d; + return true; + } +}; + +template struct Matx_FastSolveOp<_Tp, 3, 3, 1> +{ + bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b, + Matx<_Tp, 3, 1>& x, int) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) - + a(0,1)*(b(1)*a(2,2) - a(1,2)*b(2)) + + a(0,2)*(b(1)*a(2,1) - a(1,1)*b(2))); + + x(1) = d*(a(0,0)*(b(1)*a(2,2) - a(1,2)*b(2)) - + b(0)*(a(1,0)*a(2,2) - a(1,2)*a(2,0)) + + a(0,2)*(a(1,0)*b(2) - b(1)*a(2,0))); + + x(2) = d*(a(0,0)*(a(1,1)*b(2) - b(1)*a(2,1)) - + a(0,1)*(a(1,0)*b(2) - b(1)*a(2,0)) + + b(0)*(a(1,0)*a(2,1) - a(1,1)*a(2,0))); + return true; + } +}; + +} // internal + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::randu(_Tp a, _Tp b) +{ + Matx<_Tp,m,n> M; + cv::randu(M, Scalar(a), Scalar(b)); + return M; +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b) +{ + Matx<_Tp,m,n> M; + cv::randn(M, Scalar(a), Scalar(b)); + return M; +} + +template inline +Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const +{ + Matx<_Tp, n, m> b; + bool ok = cv::internal::Matx_FastInvOp<_Tp, m, n>()(*this, b, method); + if (p_is_ok) *p_is_ok = ok; + return ok ? b : Matx<_Tp, n, m>::zeros(); +} + +template template inline +Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const +{ + Matx<_Tp, n, l> x; + bool ok = cv::internal::Matx_FastSolveOp<_Tp, m, n, l>()(*this, rhs, x, method); + return ok ? x : Matx<_Tp, n, l>::zeros(); +} + + + +////////////////////////// Augmenting algebraic & logical operations ////////////////////////// + +#define CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \ + static inline A& operator op (A& a, const B& b) { cvop; return a; } + +#define CV_MAT_AUG_OPERATOR(op, cvop, A, B) \ + CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \ + CV_MAT_AUG_OPERATOR1(op, cvop, const A, B) + +#define CV_MAT_AUG_OPERATOR_T(op, cvop, A, B) \ + template CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \ + template CV_MAT_AUG_OPERATOR1(op, cvop, const A, B) + +#define CV_MAT_AUG_OPERATOR_TN(op, cvop, A) \ + template static inline A& operator op (A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } \ + template static inline const A& operator op (const A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } + +CV_MAT_AUG_OPERATOR (+=, cv::add(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (+=, cv::add(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (-=, cv::subtract(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (-=, cv::subtract(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat) +CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR (*=, a.convertTo(a, -1, b), Mat, double) +CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double) +CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat) +CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (/=, cv::divide(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double) +CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double) +CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +#undef CV_MAT_AUG_OPERATOR_TN +#undef CV_MAT_AUG_OPERATOR_T +#undef CV_MAT_AUG_OPERATOR +#undef CV_MAT_AUG_OPERATOR1 + + + +///////////////////////////////////////////// SVD ///////////////////////////////////////////// + +inline SVD::SVD() {} +inline SVD::SVD( InputArray m, int flags ) { operator ()(m, flags); } +inline void SVD::solveZ( InputArray m, OutputArray _dst ) +{ + Mat mtx = m.getMat(); + SVD svd(mtx, (mtx.rows >= mtx.cols ? 0 : SVD::FULL_UV)); + _dst.create(svd.vt.cols, 1, svd.vt.type()); + Mat dst = _dst.getMat(); + svd.vt.row(svd.vt.rows-1).reshape(1,svd.vt.cols).copyTo(dst); +} + +template inline void + SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt ) +{ + CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector."); + Mat _a(a, false), _u(u, false), _w(w, false), _vt(vt, false); + SVD::compute(_a, _w, _u, _vt); + CV_Assert(_w.data == (uchar*)&w.val[0] && _u.data == (uchar*)&u.val[0] && _vt.data == (uchar*)&vt.val[0]); +} + +template inline void +SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w ) +{ + CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector."); + Mat _a(a, false), _w(w, false); + SVD::compute(_a, _w); + CV_Assert(_w.data == (uchar*)&w.val[0]); +} + +template inline void +SVD::backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u, + const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs, + Matx<_Tp, n, nb>& dst ) +{ + CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector."); + Mat _u(u, false), _w(w, false), _vt(vt, false), _rhs(rhs, false), _dst(dst, false); + SVD::backSubst(_w, _u, _vt, _rhs, _dst); + CV_Assert(_dst.data == (uchar*)&dst.val[0]); +} + + + +/////////////////////////////////// Multiply-with-Carry RNG /////////////////////////////////// + +inline RNG::RNG() { state = 0xffffffff; } +inline RNG::RNG(uint64 _state) { state = _state ? _state : 0xffffffff; } + +inline RNG::operator uchar() { return (uchar)next(); } +inline RNG::operator schar() { return (schar)next(); } +inline RNG::operator ushort() { return (ushort)next(); } +inline RNG::operator short() { return (short)next(); } +inline RNG::operator int() { return (int)next(); } +inline RNG::operator unsigned() { return next(); } +inline RNG::operator float() { return next()*2.3283064365386962890625e-10f; } +inline RNG::operator double() { unsigned t = next(); return (((uint64)t << 32) | next()) * 5.4210108624275221700372640043497e-20; } + +inline unsigned RNG::operator ()(unsigned N) { return (unsigned)uniform(0,N); } +inline unsigned RNG::operator ()() { return next(); } + +inline int RNG::uniform(int a, int b) { return a == b ? a : (int)(next() % (b - a) + a); } +inline float RNG::uniform(float a, float b) { return ((float)*this)*(b - a) + a; } +inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; } + +inline bool RNG::operator ==(const RNG& other) const { return state == other.state; } + +inline unsigned RNG::next() +{ + state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32); + return (unsigned)state; +} + +//! returns the next uniformly-distributed random number of the specified type +template static inline _Tp randu() +{ + return (_Tp)theRNG(); +} + +///////////////////////////////// Formatted string generation ///////////////////////////////// + +/** @brief Returns a text string formatted using the printf-like expression. + +The function acts like sprintf but forms and returns an STL string. It can be used to form an error +message in the Exception constructor. +@param fmt printf-compatible formatting specifiers. + +**Note**: +|Type|Specifier| +|-|-| +|`const char*`|`%s`| +|`char`|`%c`| +|`float` / `double`|`%f`,`%g`| +|`int`, `long`, `long long`|`%d`, `%ld`, ``%lld`| +|`unsigned`, `unsigned long`, `unsigned long long`|`%u`, `%lu`, `%llu`| +|`uint64` -> `uintmax_t`, `int64` -> `intmax_t`|`%ju`, `%jd`| +|`size_t`|`%zu`| + */ +CV_EXPORTS String format( const char* fmt, ... ) CV_FORMAT_PRINTF(1, 2); + +///////////////////////////////// Formatted output of cv::Mat ///////////////////////////////// + +static inline +Ptr format(InputArray mtx, Formatter::FormatType fmt) +{ + return Formatter::get(fmt)->format(mtx.getMat()); +} + +static inline +int print(Ptr fmtd, FILE* stream = stdout) +{ + int written = 0; + fmtd->reset(); + for(const char* str = fmtd->next(); str; str = fmtd->next()) + written += fputs(str, stream); + + return written; +} + +static inline +int print(const Mat& mtx, FILE* stream = stdout) +{ + return print(Formatter::get()->format(mtx), stream); +} + +static inline +int print(const UMat& mtx, FILE* stream = stdout) +{ + return print(Formatter::get()->format(mtx.getMat(ACCESS_READ)), stream); +} + +template static inline +int print(const std::vector >& vec, FILE* stream = stdout) +{ + return print(Formatter::get()->format(Mat(vec)), stream); +} + +template static inline +int print(const std::vector >& vec, FILE* stream = stdout) +{ + return print(Formatter::get()->format(Mat(vec)), stream); +} + +template static inline +int print(const Matx<_Tp, m, n>& matx, FILE* stream = stdout) +{ + return print(Formatter::get()->format(cv::Mat(matx)), stream); +} + +//! @endcond + +/****************************************************************************************\ +* Auxiliary algorithms * +\****************************************************************************************/ + +/** @brief Splits an element set into equivalency classes. + +The generic function partition implements an \f$O(N^2)\f$ algorithm for splitting a set of \f$N\f$ elements +into one or more equivalency classes, as described in + . The function returns the number of +equivalency classes. +@param _vec Set of elements stored as a vector. +@param labels Output vector of labels. It contains as many elements as vec. Each label labels[i] is +a 0-based cluster index of `vec[i]`. +@param predicate Equivalence predicate (pointer to a boolean function of two arguments or an +instance of the class that has the method bool operator()(const _Tp& a, const _Tp& b) ). The +predicate returns true when the elements are certainly in the same class, and returns false if they +may or may not be in the same class. +@ingroup core_cluster +*/ +template int +partition( const std::vector<_Tp>& _vec, std::vector& labels, + _EqPredicate predicate=_EqPredicate()) +{ + int i, j, N = (int)_vec.size(); + const _Tp* vec = &_vec[0]; + + const int PARENT=0; + const int RANK=1; + + std::vector _nodes(N*2); + int (*nodes)[2] = (int(*)[2])&_nodes[0]; + + // The first O(N) pass: create N single-vertex trees + for(i = 0; i < N; i++) + { + nodes[i][PARENT]=-1; + nodes[i][RANK] = 0; + } + + // The main O(N^2) pass: merge connected components + for( i = 0; i < N; i++ ) + { + int root = i; + + // find root + while( nodes[root][PARENT] >= 0 ) + root = nodes[root][PARENT]; + + for( j = 0; j < N; j++ ) + { + if( i == j || !predicate(vec[i], vec[j])) + continue; + int root2 = j; + + while( nodes[root2][PARENT] >= 0 ) + root2 = nodes[root2][PARENT]; + + if( root2 != root ) + { + // unite both trees + int rank = nodes[root][RANK], rank2 = nodes[root2][RANK]; + if( rank > rank2 ) + nodes[root2][PARENT] = root; + else + { + nodes[root][PARENT] = root2; + nodes[root2][RANK] += rank == rank2; + root = root2; + } + CV_Assert( nodes[root][PARENT] < 0 ); + + int k = j, parent; + + // compress the path from node2 to root + while( (parent = nodes[k][PARENT]) >= 0 ) + { + nodes[k][PARENT] = root; + k = parent; + } + + // compress the path from node to root + k = i; + while( (parent = nodes[k][PARENT]) >= 0 ) + { + nodes[k][PARENT] = root; + k = parent; + } + } + } + } + + // Final O(N) pass: enumerate classes + labels.resize(N); + int nclasses = 0; + + for( i = 0; i < N; i++ ) + { + int root = i; + while( nodes[root][PARENT] >= 0 ) + root = nodes[root][PARENT]; + // re-use the rank as the class label + if( nodes[root][RANK] >= 0 ) + nodes[root][RANK] = ~nclasses++; + labels[i] = ~nodes[root][RANK]; + } + + return nclasses; +} + +} // cv + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/optim.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/optim.hpp new file mode 100644 index 0000000..f61a2b9 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/optim.hpp @@ -0,0 +1,302 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_OPTIM_HPP +#define OPENCV_OPTIM_HPP + +#include "opencv2/core.hpp" + +namespace cv +{ + +/** @addtogroup core_optim +The algorithms in this section minimize or maximize function value within specified constraints or +without any constraints. +@{ +*/ + +/** @brief Basic interface for all solvers + */ +class CV_EXPORTS MinProblemSolver : public Algorithm +{ +public: + /** @brief Represents function being optimized + */ + class CV_EXPORTS Function + { + public: + virtual ~Function() {} + virtual int getDims() const = 0; + virtual double getGradientEps() const; + virtual double calc(const double* x) const = 0; + virtual void getGradient(const double* x,double* grad); + }; + + /** @brief Getter for the optimized function. + + The optimized function is represented by Function interface, which requires derivatives to + implement the calc(double*) and getDim() methods to evaluate the function. + + @return Smart-pointer to an object that implements Function interface - it represents the + function that is being optimized. It can be empty, if no function was given so far. + */ + virtual Ptr getFunction() const = 0; + + /** @brief Setter for the optimized function. + + *It should be called at least once before the call to* minimize(), as default value is not usable. + + @param f The new function to optimize. + */ + virtual void setFunction(const Ptr& f) = 0; + + /** @brief Getter for the previously set terminal criteria for this algorithm. + + @return Deep copy of the terminal criteria used at the moment. + */ + virtual TermCriteria getTermCriteria() const = 0; + + /** @brief Set terminal criteria for solver. + + This method *is not necessary* to be called before the first call to minimize(), as the default + value is sensible. + + Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when + the function values at the vertices of simplex are within termcrit.epsilon range or simplex + becomes so small that it can enclosed in a box with termcrit.epsilon sides, whatever comes + first. + @param termcrit Terminal criteria to be used, represented as cv::TermCriteria structure. + */ + virtual void setTermCriteria(const TermCriteria& termcrit) = 0; + + /** @brief actually runs the algorithm and performs the minimization. + + The sole input parameter determines the centroid of the starting simplex (roughly, it tells + where to start), all the others (terminal criteria, initial step, function to be minimized) are + supposed to be set via the setters before the call to this method or the default values (not + always sensible) will be used. + + @param x The initial point, that will become a centroid of an initial simplex. After the algorithm + will terminate, it will be set to the point where the algorithm stops, the point of possible + minimum. + @return The value of a function at the point found. + */ + virtual double minimize(InputOutputArray x) = 0; +}; + +/** @brief This class is used to perform the non-linear non-constrained minimization of a function, + +defined on an `n`-dimensional Euclidean space, using the **Nelder-Mead method**, also known as +**downhill simplex method**. The basic idea about the method can be obtained from +. + +It should be noted, that this method, although deterministic, is rather a heuristic and therefore +may converge to a local minima, not necessary a global one. It is iterative optimization technique, +which at each step uses an information about the values of a function evaluated only at `n+1` +points, arranged as a *simplex* in `n`-dimensional space (hence the second name of the method). At +each step new point is chosen to evaluate function at, obtained value is compared with previous +ones and based on this information simplex changes it's shape , slowly moving to the local minimum. +Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear +Conjugate Gradient method (which is also implemented in optim). + +Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when the +function values at the vertices of simplex are within termcrit.epsilon range or simplex becomes so +small that it can enclosed in a box with termcrit.epsilon sides, whatever comes first, for some +defined by user positive integer termcrit.maxCount and positive non-integer termcrit.epsilon. + +@note DownhillSolver is a derivative of the abstract interface +cv::MinProblemSolver, which in turn is derived from the Algorithm interface and is used to +encapsulate the functionality, common to all non-linear optimization algorithms in the optim +module. + +@note term criteria should meet following condition: +@code + termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0 +@endcode + */ +class CV_EXPORTS DownhillSolver : public MinProblemSolver +{ +public: + /** @brief Returns the initial step that will be used in downhill simplex algorithm. + + @param step Initial step that will be used in algorithm. Note, that although corresponding setter + accepts column-vectors as well as row-vectors, this method will return a row-vector. + @see DownhillSolver::setInitStep + */ + virtual void getInitStep(OutputArray step) const=0; + + /** @brief Sets the initial step that will be used in downhill simplex algorithm. + + Step, together with initial point (given in DownhillSolver::minimize) are two `n`-dimensional + vectors that are used to determine the shape of initial simplex. Roughly said, initial point + determines the position of a simplex (it will become simplex's centroid), while step determines the + spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are + the initial step and initial point respectively, the vertices of a simplex will be: + \f$v_0:=x_0-\frac{1}{2} s\f$ and \f$v_i:=x_0+s_i\f$ for \f$i=1,2,\dots,n\f$ where \f$s_i\f$ denotes + projections of the initial step of *n*-th coordinate (the result of projection is treated to be + vector given by \f$s_i:=e_i\cdot\left\f$, where \f$e_i\f$ form canonical basis) + + @param step Initial step that will be used in algorithm. Roughly said, it determines the spread + (size in each dimension) of an initial simplex. + */ + virtual void setInitStep(InputArray step)=0; + + /** @brief This function returns the reference to the ready-to-use DownhillSolver object. + + All the parameters are optional, so this procedure can be called even without parameters at + all. In this case, the default values will be used. As default value for terminal criteria are + the only sensible ones, MinProblemSolver::setFunction() and DownhillSolver::setInitStep() + should be called upon the obtained object, if the respective parameters were not given to + create(). Otherwise, the two ways (give parameters to createDownhillSolver() or miss them out + and call the MinProblemSolver::setFunction() and DownhillSolver::setInitStep()) are absolutely + equivalent (and will drop the same errors in the same way, should invalid input be detected). + @param f Pointer to the function that will be minimized, similarly to the one you submit via + MinProblemSolver::setFunction. + @param initStep Initial step, that will be used to construct the initial simplex, similarly to the one + you submit via MinProblemSolver::setInitStep. + @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via + MinProblemSolver::setTermCriteria. + */ + static Ptr create(const Ptr& f=Ptr(), + InputArray initStep=Mat_(1,1,0.0), + TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001)); +}; + +/** @brief This class is used to perform the non-linear non-constrained minimization of a function +with known gradient, + +defined on an *n*-dimensional Euclidean space, using the **Nonlinear Conjugate Gradient method**. +The implementation was done based on the beautifully clear explanatory article [An Introduction to +the Conjugate Gradient Method Without the Agonizing +Pain](http://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf) by Jonathan Richard +Shewchuk. The method can be seen as an adaptation of a standard Conjugate Gradient method (see, for +example ) for numerically solving the +systems of linear equations. + +It should be noted, that this method, although deterministic, is rather a heuristic method and +therefore may converge to a local minima, not necessary a global one. What is even more disastrous, +most of its behaviour is ruled by gradient, therefore it essentially cannot distinguish between +local minima and maxima. Therefore, if it starts sufficiently near to the local maximum, it may +converge to it. Another obvious restriction is that it should be possible to compute the gradient of +a function at any point, thus it is preferable to have analytic expression for gradient and +computational burden should be born by the user. + +The latter responsibility is accomplished via the getGradient method of a +MinProblemSolver::Function interface (which represents function being optimized). This method takes +point a point in *n*-dimensional space (first argument represents the array of coordinates of that +point) and compute its gradient (it should be stored in the second argument as an array). + +@note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface. + +@note term criteria should meet following condition: +@code + termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0 + // or + termcrit.type == TermCriteria::MAX_ITER) && termcrit.maxCount > 0 +@endcode + */ +class CV_EXPORTS ConjGradSolver : public MinProblemSolver +{ +public: + /** @brief This function returns the reference to the ready-to-use ConjGradSolver object. + + All the parameters are optional, so this procedure can be called even without parameters at + all. In this case, the default values will be used. As default value for terminal criteria are + the only sensible ones, MinProblemSolver::setFunction() should be called upon the obtained + object, if the function was not given to create(). Otherwise, the two ways (submit it to + create() or miss it out and call the MinProblemSolver::setFunction()) are absolutely equivalent + (and will drop the same errors in the same way, should invalid input be detected). + @param f Pointer to the function that will be minimized, similarly to the one you submit via + MinProblemSolver::setFunction. + @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via + MinProblemSolver::setTermCriteria. + */ + static Ptr create(const Ptr& f=Ptr(), + TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001)); +}; + +//! return codes for cv::solveLP() function +enum SolveLPResult +{ + SOLVELP_UNBOUNDED = -2, //!< problem is unbounded (target function can achieve arbitrary high values) + SOLVELP_UNFEASIBLE = -1, //!< problem is unfeasible (there are no points that satisfy all the constraints imposed) + SOLVELP_SINGLE = 0, //!< there is only one maximum for target function + SOLVELP_MULTI = 1 //!< there are multiple maxima for target function - the arbitrary one is returned +}; + +/** @brief Solve given (non-integer) linear programming problem using the Simplex Algorithm (Simplex Method). + +What we mean here by "linear programming problem" (or LP problem, for short) can be formulated as: + +\f[\mbox{Maximize } c\cdot x\\ + \mbox{Subject to:}\\ + Ax\leq b\\ + x\geq 0\f] + +Where \f$c\f$ is fixed `1`-by-`n` row-vector, \f$A\f$ is fixed `m`-by-`n` matrix, \f$b\f$ is fixed `m`-by-`1` +column vector and \f$x\f$ is an arbitrary `n`-by-`1` column vector, which satisfies the constraints. + +Simplex algorithm is one of many algorithms that are designed to handle this sort of problems +efficiently. Although it is not optimal in theoretical sense (there exist algorithms that can solve +any problem written as above in polynomial time, while simplex method degenerates to exponential +time for some special cases), it is well-studied, easy to implement and is shown to work well for +real-life purposes. + +The particular implementation is taken almost verbatim from **Introduction to Algorithms, third +edition** by T. H. Cormen, C. E. Leiserson, R. L. Rivest and Clifford Stein. In particular, the +Bland's rule is used to prevent cycling. + +@param Func This row-vector corresponds to \f$c\f$ in the LP problem formulation (see above). It should +contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted, +in the latter case it is understood to correspond to \f$c^T\f$. +@param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above +and the remaining to \f$A\f$. It should contain 32- or 64-bit floating point numbers. +@param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the +formulation above. It will contain 64-bit floating point numbers. +@return One of cv::SolveLPResult + */ +CV_EXPORTS_W int solveLP(InputArray Func, InputArray Constr, OutputArray z); + +//! @} + +}// cv + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ovx.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ovx.hpp new file mode 100644 index 0000000..8bb7d54 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/ovx.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +// OpenVX related definitions and declarations + +#pragma once +#ifndef OPENCV_OVX_HPP +#define OPENCV_OVX_HPP + +#include "cvdef.h" + +namespace cv +{ +/// Check if use of OpenVX is possible +CV_EXPORTS_W bool haveOpenVX(); + +/// Check if use of OpenVX is enabled +CV_EXPORTS_W bool useOpenVX(); + +/// Enable/disable use of OpenVX +CV_EXPORTS_W void setUseOpenVX(bool flag); +} // namespace cv + +#endif // OPENCV_OVX_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/persistence.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/persistence.hpp new file mode 100644 index 0000000..276f640 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/persistence.hpp @@ -0,0 +1,1350 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_PERSISTENCE_HPP +#define OPENCV_CORE_PERSISTENCE_HPP + +#ifndef CV_DOXYGEN +/// Define to support persistence legacy formats +#define CV__LEGACY_PERSISTENCE +#endif + +#ifndef __cplusplus +# error persistence.hpp header must be compiled as C++ +#endif + +//! @addtogroup core_c +//! @{ + +/** @brief "black box" representation of the file storage associated with a file on disk. + +Several functions that are described below take CvFileStorage\* as inputs and allow the user to +save or to load hierarchical collections that consist of scalar values, standard CXCore objects +(such as matrices, sequences, graphs), and user-defined objects. + +OpenCV can read and write data in XML (), YAML () or +JSON () formats. Below is an example of 3x3 floating-point identity matrix A, +stored in XML and YAML files +using CXCore functions: +XML: +@code{.xml} + + + + 3 + 3 +
f
+ 1. 0. 0. 0. 1. 0. 0. 0. 1. +
+
+@endcode +YAML: +@code{.yaml} + %YAML:1.0 + A: !!opencv-matrix + rows: 3 + cols: 3 + dt: f + data: [ 1., 0., 0., 0., 1., 0., 0., 0., 1.] +@endcode +As it can be seen from the examples, XML uses nested tags to represent hierarchy, while YAML uses +indentation for that purpose (similar to the Python programming language). + +The same functions can read and write data in both formats; the particular format is determined by +the extension of the opened file, ".xml" for XML files, ".yml" or ".yaml" for YAML and ".json" for +JSON. + */ + +//! @} core_c + +#include "opencv2/core/types.hpp" +#include "opencv2/core/mat.hpp" + +namespace cv { + +/** @addtogroup core_xml + +XML/YAML/JSON file storages. {#xml_storage} +======================= +Writing to a file storage. +-------------------------- +You can store and then restore various OpenCV data structures to/from XML (), +YAML () or JSON () formats. Also, it is possible to store +and load arbitrarily complex data structures, which include OpenCV data structures, as well as +primitive data types (integer and floating-point numbers and text strings) as their elements. + +Use the following procedure to write something to XML, YAML or JSON: +-# Create new FileStorage and open it for writing. It can be done with a single call to +FileStorage::FileStorage constructor that takes a filename, or you can use the default constructor +and then call FileStorage::open. Format of the file (XML, YAML or JSON) is determined from the filename +extension (".xml", ".yml"/".yaml" and ".json", respectively) +-# Write all the data you want using the streaming operator `<<`, just like in the case of STL +streams. +-# Close the file using FileStorage::release. FileStorage destructor also closes the file. + +Here is an example: +@code + #include "opencv2/core.hpp" + #include + + using namespace cv; + + int main(int, char** argv) + { + FileStorage fs("test.yml", FileStorage::WRITE); + + fs << "frameCount" << 5; + time_t rawtime; time(&rawtime); + fs << "calibrationDate" << asctime(localtime(&rawtime)); + Mat cameraMatrix = (Mat_(3,3) << 1000, 0, 320, 0, 1000, 240, 0, 0, 1); + Mat distCoeffs = (Mat_(5,1) << 0.1, 0.01, -0.001, 0, 0); + fs << "cameraMatrix" << cameraMatrix << "distCoeffs" << distCoeffs; + fs << "features" << "["; + for( int i = 0; i < 3; i++ ) + { + int x = rand() % 640; + int y = rand() % 480; + uchar lbp = rand() % 256; + + fs << "{:" << "x" << x << "y" << y << "lbp" << "[:"; + for( int j = 0; j < 8; j++ ) + fs << ((lbp >> j) & 1); + fs << "]" << "}"; + } + fs << "]"; + fs.release(); + return 0; + } +@endcode +The sample above stores to YML an integer, a text string (calibration date), 2 matrices, and a custom +structure "feature", which includes feature coordinates and LBP (local binary pattern) value. Here +is output of the sample: +@code{.yaml} +%YAML:1.0 +frameCount: 5 +calibrationDate: "Fri Jun 17 14:09:29 2011\n" +cameraMatrix: !!opencv-matrix + rows: 3 + cols: 3 + dt: d + data: [ 1000., 0., 320., 0., 1000., 240., 0., 0., 1. ] +distCoeffs: !!opencv-matrix + rows: 5 + cols: 1 + dt: d + data: [ 1.0000000000000001e-01, 1.0000000000000000e-02, + -1.0000000000000000e-03, 0., 0. ] +features: + - { x:167, y:49, lbp:[ 1, 0, 0, 1, 1, 0, 1, 1 ] } + - { x:298, y:130, lbp:[ 0, 0, 0, 1, 0, 0, 1, 1 ] } + - { x:344, y:158, lbp:[ 1, 1, 0, 0, 0, 0, 1, 0 ] } +@endcode + +As an exercise, you can replace ".yml" with ".xml" or ".json" in the sample above and see, how the +corresponding XML file will look like. + +Several things can be noted by looking at the sample code and the output: + +- The produced YAML (and XML/JSON) consists of heterogeneous collections that can be nested. There are + 2 types of collections: named collections (mappings) and unnamed collections (sequences). In mappings + each element has a name and is accessed by name. This is similar to structures and std::map in + C/C++ and dictionaries in Python. In sequences elements do not have names, they are accessed by + indices. This is similar to arrays and std::vector in C/C++ and lists, tuples in Python. + "Heterogeneous" means that elements of each single collection can have different types. + + Top-level collection in YAML/XML/JSON is a mapping. Each matrix is stored as a mapping, and the matrix + elements are stored as a sequence. Then, there is a sequence of features, where each feature is + represented a mapping, and lbp value in a nested sequence. + +- When you write to a mapping (a structure), you write element name followed by its value. When you + write to a sequence, you simply write the elements one by one. OpenCV data structures (such as + cv::Mat) are written in absolutely the same way as simple C data structures - using `<<` + operator. + +- To write a mapping, you first write the special string `{` to the storage, then write the + elements as pairs (`fs << << `) and then write the closing + `}`. + +- To write a sequence, you first write the special string `[`, then write the elements, then + write the closing `]`. + +- In YAML/JSON (but not XML), mappings and sequences can be written in a compact Python-like inline + form. In the sample above matrix elements, as well as each feature, including its lbp value, is + stored in such inline form. To store a mapping/sequence in a compact form, put `:` after the + opening character, e.g. use `{:` instead of `{` and `[:` instead of `[`. When the + data is written to XML, those extra `:` are ignored. + +Reading data from a file storage. +--------------------------------- +To read the previously written XML, YAML or JSON file, do the following: +-# Open the file storage using FileStorage::FileStorage constructor or FileStorage::open method. + In the current implementation the whole file is parsed and the whole representation of file + storage is built in memory as a hierarchy of file nodes (see FileNode) + +-# Read the data you are interested in. Use FileStorage::operator [], FileNode::operator [] + and/or FileNodeIterator. + +-# Close the storage using FileStorage::release. + +Here is how to read the file created by the code sample above: +@code + FileStorage fs2("test.yml", FileStorage::READ); + + // first method: use (type) operator on FileNode. + int frameCount = (int)fs2["frameCount"]; + + String date; + // second method: use FileNode::operator >> + fs2["calibrationDate"] >> date; + + Mat cameraMatrix2, distCoeffs2; + fs2["cameraMatrix"] >> cameraMatrix2; + fs2["distCoeffs"] >> distCoeffs2; + + cout << "frameCount: " << frameCount << endl + << "calibration date: " << date << endl + << "camera matrix: " << cameraMatrix2 << endl + << "distortion coeffs: " << distCoeffs2 << endl; + + FileNode features = fs2["features"]; + FileNodeIterator it = features.begin(), it_end = features.end(); + int idx = 0; + std::vector lbpval; + + // iterate through a sequence using FileNodeIterator + for( ; it != it_end; ++it, idx++ ) + { + cout << "feature #" << idx << ": "; + cout << "x=" << (int)(*it)["x"] << ", y=" << (int)(*it)["y"] << ", lbp: ("; + // you can also easily read numerical arrays using FileNode >> std::vector operator. + (*it)["lbp"] >> lbpval; + for( int i = 0; i < (int)lbpval.size(); i++ ) + cout << " " << (int)lbpval[i]; + cout << ")" << endl; + } + fs2.release(); +@endcode + +Format specification {#format_spec} +-------------------- +`([count]{u|c|w|s|i|f|d})`... where the characters correspond to fundamental C++ types: +- `u` 8-bit unsigned number +- `c` 8-bit signed number +- `w` 16-bit unsigned number +- `s` 16-bit signed number +- `i` 32-bit signed number +- `f` single precision floating-point number +- `d` double precision floating-point number +- `r` pointer, 32 lower bits of which are written as a signed integer. The type can be used to + store structures with links between the elements. + +`count` is the optional counter of values of a given type. For example, `2if` means that each array +element is a structure of 2 integers, followed by a single-precision floating-point number. The +equivalent notations of the above specification are `iif`, `2i1f` and so forth. Other examples: `u` +means that the array consists of bytes, and `2d` means the array consists of pairs of doubles. + +@see @ref samples/cpp/filestorage.cpp +*/ + +//! @{ + +/** @example samples/cpp/filestorage.cpp +A complete example using the FileStorage interface +*/ + +////////////////////////// XML & YAML I/O ////////////////////////// + +class CV_EXPORTS FileNode; +class CV_EXPORTS FileNodeIterator; + +/** @brief XML/YAML/JSON file storage class that encapsulates all the information necessary for writing or +reading data to/from a file. + */ +class CV_EXPORTS_W FileStorage +{ +public: + //! file storage mode + enum Mode + { + READ = 0, //!< value, open the file for reading + WRITE = 1, //!< value, open the file for writing + APPEND = 2, //!< value, open the file for appending + MEMORY = 4, //!< flag, read data from source or write data to the internal buffer (which is + //!< returned by FileStorage::release) + FORMAT_MASK = (7<<3), //!< mask for format flags + FORMAT_AUTO = 0, //!< flag, auto format + FORMAT_XML = (1<<3), //!< flag, XML format + FORMAT_YAML = (2<<3), //!< flag, YAML format + FORMAT_JSON = (3<<3), //!< flag, JSON format + + BASE64 = 64, //!< flag, write rawdata in Base64 by default. (consider using WRITE_BASE64) + WRITE_BASE64 = BASE64 | WRITE, //!< flag, enable both WRITE and BASE64 + }; + enum State + { + UNDEFINED = 0, + VALUE_EXPECTED = 1, + NAME_EXPECTED = 2, + INSIDE_MAP = 4 + }; + + /** @brief The constructors. + + The full constructor opens the file. Alternatively you can use the default constructor and then + call FileStorage::open. + */ + CV_WRAP FileStorage(); + + /** @overload + @copydoc open() + */ + CV_WRAP FileStorage(const String& filename, int flags, const String& encoding=String()); + + //! the destructor. calls release() + virtual ~FileStorage(); + + /** @brief Opens a file. + + See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release + before opening the file. + @param filename Name of the file to open or the text string to read the data from. + Extension of the file (.xml, .yml/.yaml or .json) determines its format (XML, YAML or JSON + respectively). Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both + FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify + the output file format (e.g. mydata.xml, .yml etc.). A file name can also contain parameters. + You can use this format, "*?base64" (e.g. "file.json?base64" (case sensitive)), as an alternative to + FileStorage::BASE64 flag. + @param flags Mode of operation. One of FileStorage::Mode + @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and + you should use 8-bit encoding instead of it. + */ + CV_WRAP virtual bool open(const String& filename, int flags, const String& encoding=String()); + + /** @brief Checks whether the file is opened. + + @returns true if the object is associated with the current file and false otherwise. It is a + good practice to call this method after you tried to open a file. + */ + CV_WRAP virtual bool isOpened() const; + + /** @brief Closes the file and releases all the memory buffers. + + Call this method after all I/O operations with the storage are finished. + */ + CV_WRAP virtual void release(); + + /** @brief Closes the file and releases all the memory buffers. + + Call this method after all I/O operations with the storage are finished. If the storage was + opened for writing data and FileStorage::WRITE was specified + */ + CV_WRAP virtual String releaseAndGetString(); + + /** @brief Returns the first element of the top-level mapping. + @returns The first element of the top-level mapping. + */ + CV_WRAP FileNode getFirstTopLevelNode() const; + + /** @brief Returns the top-level mapping + @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file. + However, YAML supports multiple streams and so there can be several. + @returns The top-level mapping. + */ + CV_WRAP FileNode root(int streamidx=0) const; + + /** @brief Returns the specified element of the top-level mapping. + @param nodename Name of the file node. + @returns Node with the given name. + */ + FileNode operator[](const String& nodename) const; + + /** @overload */ + CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const; + + /** + * @brief Simplified writing API to use with bindings. + * @param name Name of the written object. When writing to sequences (a.k.a. "arrays"), pass an empty string. + * @param val Value of the written object. + */ + CV_WRAP void write(const String& name, int val); + /// @overload + CV_WRAP void write(const String& name, double val); + /// @overload + CV_WRAP void write(const String& name, const String& val); + /// @overload + CV_WRAP void write(const String& name, const Mat& val); + /// @overload + CV_WRAP void write(const String& name, const std::vector& val); + + /** @brief Writes multiple numbers. + + Writes one or more numbers of the specified format to the currently written structure. Usually it is + more convenient to use operator `<<` instead of this method. + @param fmt Specification of each array element, see @ref format_spec "format specification" + @param vec Pointer to the written array. + @param len Number of the uchar elements to write. + */ + void writeRaw( const String& fmt, const void* vec, size_t len ); + + /** @brief Writes a comment. + + The function writes a comment into file storage. The comments are skipped when the storage is read. + @param comment The written comment, single-line or multi-line + @param append If true, the function tries to put the comment at the end of current line. + Else if the comment is multi-line, or if it does not fit at the end of the current + line, the comment starts a new line. + */ + CV_WRAP void writeComment(const String& comment, bool append = false); + + /** @brief Starts to write a nested structure (sequence or a mapping). + @param name name of the structure. When writing to sequences (a.k.a. "arrays"), pass an empty string. + @param flags type of the structure (FileNode::MAP or FileNode::SEQ (both with optional FileNode::FLOW)). + @param typeName optional name of the type you store. The effect of setting this depends on the storage format. + I.e. if the format has a specification for storing type information, this parameter is used. + */ + CV_WRAP void startWriteStruct(const String& name, int flags, const String& typeName=String()); + + /** @brief Finishes writing nested structure (should pair startWriteStruct()) + */ + CV_WRAP void endWriteStruct(); + + /** @brief Returns the normalized object name for the specified name of a file. + @param filename Name of a file + @returns The normalized object name. + */ + static String getDefaultObjectName(const String& filename); + + /** @brief Returns the current format. + * @returns The current format, see FileStorage::Mode + */ + CV_WRAP int getFormat() const; + + int state; + std::string elname; + + class Impl; + Ptr p; +}; + +/** @brief File Storage Node class. + +The node is used to store each and every element of the file storage opened for reading. When +XML/YAML file is read, it is first parsed and stored in the memory as a hierarchical collection of +nodes. Each node can be a "leaf" that is contain a single number or a string, or be a collection of +other nodes. There can be named collections (mappings) where each element has a name and it is +accessed by a name, and ordered collections (sequences) where elements do not have names but rather +accessed by index. Type of the file node can be determined using FileNode::type method. + +Note that file nodes are only used for navigating file storages opened for reading. When a file +storage is opened for writing, no data is stored in memory after it is written. + */ +class CV_EXPORTS_W_SIMPLE FileNode +{ +public: + //! type of the file storage node + enum + { + NONE = 0, //!< empty node + INT = 1, //!< an integer + REAL = 2, //!< floating-point number + FLOAT = REAL, //!< synonym or REAL + STR = 3, //!< text string in UTF-8 encoding + STRING = STR, //!< synonym for STR + SEQ = 4, //!< sequence + MAP = 5, //!< mapping + TYPE_MASK = 7, + + FLOW = 8, //!< compact representation of a sequence or mapping. Used only by YAML writer + UNIFORM = 8, //!< if set, means that all the collection elements are numbers of the same type (real's or int's). + //!< UNIFORM is used only when reading FileStorage; FLOW is used only when writing. So they share the same bit + EMPTY = 16, //!< empty structure (sequence or mapping) + NAMED = 32 //!< the node has a name (i.e. it is element of a mapping). + }; + /** @brief The constructors. + + These constructors are used to create a default file node, construct it from obsolete structures or + from the another file node. + */ + CV_WRAP FileNode(); + + /** @overload + @param fs Pointer to the file storage structure. + @param blockIdx Index of the memory block where the file node is stored + @param ofs Offset in bytes from the beginning of the serialized storage + + @deprecated + */ + FileNode(const FileStorage* fs, size_t blockIdx, size_t ofs); + + /** @overload + @param node File node to be used as initialization for the created file node. + */ + FileNode(const FileNode& node); + + FileNode& operator=(const FileNode& node); + + /** @brief Returns element of a mapping node or a sequence node. + @param nodename Name of an element in the mapping node. + @returns Returns the element with the given identifier. + */ + FileNode operator[](const String& nodename) const; + + /** @overload + @param nodename Name of an element in the mapping node. + */ + CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const; + + /** @overload + @param i Index of an element in the sequence node. + */ + CV_WRAP_AS(at) FileNode operator[](int i) const; + + /** @brief Returns keys of a mapping node. + @returns Keys of a mapping node. + */ + CV_WRAP std::vector keys() const; + + /** @brief Returns type of the node. + @returns Type of the node. See FileNode::Type + */ + CV_WRAP int type() const; + + //! returns true if the node is empty + CV_WRAP bool empty() const; + //! returns true if the node is a "none" object + CV_WRAP bool isNone() const; + //! returns true if the node is a sequence + CV_WRAP bool isSeq() const; + //! returns true if the node is a mapping + CV_WRAP bool isMap() const; + //! returns true if the node is an integer + CV_WRAP bool isInt() const; + //! returns true if the node is a floating-point number + CV_WRAP bool isReal() const; + //! returns true if the node is a text string + CV_WRAP bool isString() const; + //! returns true if the node has a name + CV_WRAP bool isNamed() const; + //! returns the node name or an empty string if the node is nameless + CV_WRAP std::string name() const; + //! returns the number of elements in the node, if it is a sequence or mapping, or 1 otherwise. + CV_WRAP size_t size() const; + //! returns raw size of the FileNode in bytes + CV_WRAP size_t rawSize() const; + //! returns the node content as an integer. If the node stores floating-point number, it is rounded. + operator int() const; + //! returns the node content as float + operator float() const; + //! returns the node content as double + operator double() const; + //! returns the node content as text string + inline operator std::string() const { return this->string(); } + + static bool isMap(int flags); + static bool isSeq(int flags); + static bool isCollection(int flags); + static bool isEmptyCollection(int flags); + static bool isFlow(int flags); + + uchar* ptr(); + const uchar* ptr() const; + + //! returns iterator pointing to the first node element + FileNodeIterator begin() const; + //! returns iterator pointing to the element following the last node element + FileNodeIterator end() const; + + /** @brief Reads node elements to the buffer with the specified format. + + Usually it is more convenient to use operator `>>` instead of this method. + @param fmt Specification of each array element. See @ref format_spec "format specification" + @param vec Pointer to the destination array. + @param len Number of bytes to read (buffer size limit). If it is greater than number of + remaining elements then all of them will be read. + */ + void readRaw( const String& fmt, void* vec, size_t len ) const; + + /** Internal method used when reading FileStorage. + Sets the type (int, real or string) and value of the previously created node. + */ + void setValue( int type, const void* value, int len=-1 ); + + //! Simplified reading API to use with bindings. + CV_WRAP double real() const; + //! Simplified reading API to use with bindings. + CV_WRAP std::string string() const; + //! Simplified reading API to use with bindings. + CV_WRAP Mat mat() const; + + //protected: + FileNode(FileStorage::Impl* fs, size_t blockIdx, size_t ofs); + + FileStorage::Impl* fs; + size_t blockIdx; + size_t ofs; +}; + + +/** @brief used to iterate through sequences and mappings. + + A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a + sequence, stored in node. See the data reading sample in the beginning of the section. + */ +class CV_EXPORTS FileNodeIterator +{ +public: + /** @brief The constructors. + + These constructors are used to create a default iterator, set it to specific element in a file node + or construct it from another iterator. + */ + FileNodeIterator(); + + /** @overload + @param node File node - the collection to iterate over; + it can be a scalar (equivalent to 1-element collection) or "none" (equivalent to empty collection). + @param seekEnd - true if iterator needs to be set after the last element of the node; + that is: + * node.begin() => FileNodeIterator(node, false) + * node.end() => FileNodeIterator(node, true) + */ + FileNodeIterator(const FileNode& node, bool seekEnd); + + /** @overload + @param it Iterator to be used as initialization for the created iterator. + */ + FileNodeIterator(const FileNodeIterator& it); + + FileNodeIterator& operator=(const FileNodeIterator& it); + + //! returns the currently observed element + FileNode operator *() const; + + //! moves iterator to the next node + FileNodeIterator& operator ++ (); + //! moves iterator to the next node + FileNodeIterator operator ++ (int); + //! moves iterator forward by the specified offset (possibly negative) + FileNodeIterator& operator += (int ofs); + + /** @brief Reads node elements to the buffer with the specified format. + + Usually it is more convenient to use operator `>>` instead of this method. + @param fmt Specification of each array element. See @ref format_spec "format specification" + @param vec Pointer to the destination array. + @param len Number of bytes to read (buffer size limit). If it is greater than number of + remaining elements then all of them will be read. + */ + FileNodeIterator& readRaw( const String& fmt, void* vec, + size_t len=(size_t)INT_MAX ); + + //! returns the number of remaining (not read yet) elements + size_t remaining() const; + + bool equalTo(const FileNodeIterator& it) const; + +protected: + FileStorage::Impl* fs; + size_t blockIdx; + size_t ofs; + size_t blockSize; + size_t nodeNElems; + size_t idx; +}; + +//! @} core_xml + +/////////////////// XML & YAML I/O implementation ////////////////// + +//! @relates cv::FileStorage +//! @{ + +CV_EXPORTS void write( FileStorage& fs, const String& name, int value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, float value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, double value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, const String& value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, const Mat& value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, const SparseMat& value ); +#ifdef CV__LEGACY_PERSISTENCE +CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector& value); +CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector& value); +#endif + +CV_EXPORTS void writeScalar( FileStorage& fs, int value ); +CV_EXPORTS void writeScalar( FileStorage& fs, float value ); +CV_EXPORTS void writeScalar( FileStorage& fs, double value ); +CV_EXPORTS void writeScalar( FileStorage& fs, const String& value ); + +//! @} + +//! @relates cv::FileNode +//! @{ + +CV_EXPORTS void read(const FileNode& node, int& value, int default_value); +CV_EXPORTS void read(const FileNode& node, float& value, float default_value); +CV_EXPORTS void read(const FileNode& node, double& value, double default_value); +CV_EXPORTS void read(const FileNode& node, std::string& value, const std::string& default_value); +CV_EXPORTS void read(const FileNode& node, Mat& mat, const Mat& default_mat = Mat() ); +CV_EXPORTS void read(const FileNode& node, SparseMat& mat, const SparseMat& default_mat = SparseMat() ); +#ifdef CV__LEGACY_PERSISTENCE +CV_EXPORTS void read(const FileNode& node, std::vector& keypoints); +CV_EXPORTS void read(const FileNode& node, std::vector& matches); +#endif +CV_EXPORTS void read(const FileNode& node, KeyPoint& value, const KeyPoint& default_value); +CV_EXPORTS void read(const FileNode& node, DMatch& value, const DMatch& default_value); + +template static inline void read(const FileNode& node, Point_<_Tp>& value, const Point_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 2 ? default_value : Point_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1])); +} + +template static inline void read(const FileNode& node, Point3_<_Tp>& value, const Point3_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 3 ? default_value : Point3_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]), + saturate_cast<_Tp>(temp[2])); +} + +template static inline void read(const FileNode& node, Size_<_Tp>& value, const Size_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 2 ? default_value : Size_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1])); +} + +template static inline void read(const FileNode& node, Complex<_Tp>& value, const Complex<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 2 ? default_value : Complex<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1])); +} + +template static inline void read(const FileNode& node, Rect_<_Tp>& value, const Rect_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 4 ? default_value : Rect_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]), + saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3])); +} + +template static inline void read(const FileNode& node, Vec<_Tp, cn>& value, const Vec<_Tp, cn>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != cn ? default_value : Vec<_Tp, cn>(&temp[0]); +} + +template static inline void read(const FileNode& node, Matx<_Tp, m, n>& value, const Matx<_Tp, m, n>& default_matx = Matx<_Tp, m, n>()) +{ + Mat temp; + read(node, temp); // read as a Mat class + + if (temp.empty()) + value = default_matx; + else + value = Matx<_Tp, m, n>(temp); +} + +template static inline void read(const FileNode& node, Scalar_<_Tp>& value, const Scalar_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 4 ? default_value : Scalar_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]), + saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3])); +} + +static inline void read(const FileNode& node, Range& value, const Range& default_value) +{ + Point2i temp(value.start, value.end); const Point2i default_temp = Point2i(default_value.start, default_value.end); + read(node, temp, default_temp); + value.start = temp.x; value.end = temp.y; +} + +//! @} + +/** @brief Writes string to a file storage. +@relates cv::FileStorage + */ +CV_EXPORTS FileStorage& operator << (FileStorage& fs, const String& str); + +//! @cond IGNORED + +namespace internal +{ + class CV_EXPORTS WriteStructContext + { + public: + WriteStructContext(FileStorage& _fs, const String& name, int flags, const String& typeName = String()); + ~WriteStructContext(); + private: + FileStorage* fs; + }; + + template class VecWriterProxy + { + public: + VecWriterProxy( FileStorage* _fs ) : fs(_fs) {} + void operator()(const std::vector<_Tp>& vec) const + { + size_t count = vec.size(); + for (size_t i = 0; i < count; i++) + write(*fs, vec[i]); + } + private: + FileStorage* fs; + }; + + template class VecWriterProxy<_Tp, 1> + { + public: + VecWriterProxy( FileStorage* _fs ) : fs(_fs) {} + void operator()(const std::vector<_Tp>& vec) const + { + int _fmt = traits::SafeFmt<_Tp>::fmt; + char fmt[] = { (char)((_fmt >> 8) + '1'), (char)_fmt, '\0' }; + fs->writeRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, vec.size() * sizeof(_Tp)); + } + private: + FileStorage* fs; + }; + + template class VecReaderProxy + { + public: + VecReaderProxy( FileNodeIterator* _it ) : it(_it) {} + void operator()(std::vector<_Tp>& vec, size_t count) const + { + count = std::min(count, it->remaining()); + vec.resize(count); + for (size_t i = 0; i < count; i++, ++(*it)) + read(**it, vec[i], _Tp()); + } + private: + FileNodeIterator* it; + }; + + template class VecReaderProxy<_Tp, 1> + { + public: + VecReaderProxy( FileNodeIterator* _it ) : it(_it) {} + void operator()(std::vector<_Tp>& vec, size_t count) const + { + size_t remaining = it->remaining(); + size_t cn = DataType<_Tp>::channels; + int _fmt = traits::SafeFmt<_Tp>::fmt; + CV_Assert((_fmt >> 8) < 9); + char fmt[] = { (char)((_fmt >> 8)+'1'), (char)_fmt, '\0' }; + CV_Assert((remaining % cn) == 0); + size_t remaining1 = remaining / cn; + count = count > remaining1 ? remaining1 : count; + vec.resize(count); + it->readRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, count*sizeof(_Tp)); + } + private: + FileNodeIterator* it; + }; + +} // internal + +//! @endcond + +//! @relates cv::FileStorage +//! @{ + +template static inline +void write(FileStorage& fs, const _Tp& value) +{ + write(fs, String(), value); +} + +template<> inline +void write( FileStorage& fs, const int& value ) +{ + writeScalar(fs, value); +} + +template<> inline +void write( FileStorage& fs, const float& value ) +{ + writeScalar(fs, value); +} + +template<> inline +void write( FileStorage& fs, const double& value ) +{ + writeScalar(fs, value); +} + +template<> inline +void write( FileStorage& fs, const String& value ) +{ + writeScalar(fs, value); +} + +template static inline +void write(FileStorage& fs, const Point_<_Tp>& pt ) +{ + write(fs, pt.x); + write(fs, pt.y); +} + +template static inline +void write(FileStorage& fs, const Point3_<_Tp>& pt ) +{ + write(fs, pt.x); + write(fs, pt.y); + write(fs, pt.z); +} + +template static inline +void write(FileStorage& fs, const Size_<_Tp>& sz ) +{ + write(fs, sz.width); + write(fs, sz.height); +} + +template static inline +void write(FileStorage& fs, const Complex<_Tp>& c ) +{ + write(fs, c.re); + write(fs, c.im); +} + +template static inline +void write(FileStorage& fs, const Rect_<_Tp>& r ) +{ + write(fs, r.x); + write(fs, r.y); + write(fs, r.width); + write(fs, r.height); +} + +template static inline +void write(FileStorage& fs, const Vec<_Tp, cn>& v ) +{ + for(int i = 0; i < cn; i++) + write(fs, v.val[i]); +} + +template static inline +void write(FileStorage& fs, const Matx<_Tp, m, n>& x ) +{ + write(fs, Mat(x)); // write as a Mat class +} + +template static inline +void write(FileStorage& fs, const Scalar_<_Tp>& s ) +{ + write(fs, s.val[0]); + write(fs, s.val[1]); + write(fs, s.val[2]); + write(fs, s.val[3]); +} + +static inline +void write(FileStorage& fs, const Range& r ) +{ + write(fs, r.start); + write(fs, r.end); +} + +template static inline +void write( FileStorage& fs, const std::vector<_Tp>& vec ) +{ + cv::internal::VecWriterProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> w(&fs); + w(vec); +} + +template static inline +void write(FileStorage& fs, const String& name, const Point_<_Tp>& pt ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, pt); +} + +template static inline +void write(FileStorage& fs, const String& name, const Point3_<_Tp>& pt ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, pt); +} + +template static inline +void write(FileStorage& fs, const String& name, const Size_<_Tp>& sz ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, sz); +} + +template static inline +void write(FileStorage& fs, const String& name, const Complex<_Tp>& c ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, c); +} + +template static inline +void write(FileStorage& fs, const String& name, const Rect_<_Tp>& r ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, r); +} + +template static inline +void write(FileStorage& fs, const String& name, const Vec<_Tp, cn>& v ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, v); +} + +template static inline +void write(FileStorage& fs, const String& name, const Matx<_Tp, m, n>& x ) +{ + write(fs, name, Mat(x)); // write as a Mat class +} + +template static inline +void write(FileStorage& fs, const String& name, const Scalar_<_Tp>& s ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, s); +} + +static inline +void write(FileStorage& fs, const String& name, const Range& r ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, r); +} + +static inline +void write(FileStorage& fs, const String& name, const KeyPoint& kpt) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, kpt.pt.x); + write(fs, kpt.pt.y); + write(fs, kpt.size); + write(fs, kpt.angle); + write(fs, kpt.response); + write(fs, kpt.octave); + write(fs, kpt.class_id); +} + +static inline +void write(FileStorage& fs, const String& name, const DMatch& m) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, m.queryIdx); + write(fs, m.trainIdx); + write(fs, m.imgIdx); + write(fs, m.distance); +} + +template::value >::type* = nullptr> +static inline void write( FileStorage& fs, const String& name, const _Tp& val ) +{ + write(fs, name, static_cast(val)); +} + +template static inline +void write( FileStorage& fs, const String& name, const std::vector<_Tp>& vec ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0)); + write(fs, vec); +} + +template static inline +void write( FileStorage& fs, const String& name, const std::vector< std::vector<_Tp> >& vec ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ); + for(size_t i = 0; i < vec.size(); i++) + { + cv::internal::WriteStructContext ws_(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0)); + write(fs, vec[i]); + } +} + +#ifdef CV__LEGACY_PERSISTENCE +// This code is not needed anymore, but it is preserved here to keep source compatibility +// Implementation is similar to templates instantiations +static inline void write(FileStorage& fs, const KeyPoint& kpt) { write(fs, String(), kpt); } +static inline void write(FileStorage& fs, const DMatch& m) { write(fs, String(), m); } +static inline void write(FileStorage& fs, const std::vector& vec) +{ + cv::internal::VecWriterProxy w(&fs); + w(vec); +} +static inline void write(FileStorage& fs, const std::vector& vec) +{ + cv::internal::VecWriterProxy w(&fs); + w(vec); + +} +#endif + +//! @} FileStorage + +//! @relates cv::FileNode +//! @{ + +static inline +void read(const FileNode& node, bool& value, bool default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = temp != 0; +} + +static inline +void read(const FileNode& node, uchar& value, uchar default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +static inline +void read(const FileNode& node, schar& value, schar default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +static inline +void read(const FileNode& node, ushort& value, ushort default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +static inline +void read(const FileNode& node, short& value, short default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +template static inline +void read( FileNodeIterator& it, std::vector<_Tp>& vec, size_t maxCount = (size_t)INT_MAX ) +{ + cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it); + r(vec, maxCount); +} + +template::value >::type* = nullptr> +static inline void read(const FileNode& node, _Tp& value, const _Tp& default_value = static_cast<_Tp>(0)) +{ + int temp; + read(node, temp, static_cast(default_value)); + value = static_cast<_Tp>(temp); +} + +template static inline +void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>& default_value = std::vector<_Tp>() ) +{ + if(node.empty()) + vec = default_value; + else + { + FileNodeIterator it = node.begin(); + read( it, vec ); + } +} + +static inline +void read( const FileNode& node, std::vector& vec, const std::vector& default_value ) +{ + if(node.empty()) + vec = default_value; + else + read(node, vec); +} + +static inline +void read( const FileNode& node, std::vector& vec, const std::vector& default_value ) +{ + if(node.empty()) + vec = default_value; + else + read(node, vec); +} + +//! @} FileNode + +//! @relates cv::FileStorage +//! @{ + +/** @brief Writes data to a file storage. + */ +template static inline +FileStorage& operator << (FileStorage& fs, const _Tp& value) +{ + if( !fs.isOpened() ) + return fs; + if( fs.state == FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP ) + CV_Error( Error::StsError, "No element name has been given" ); + write( fs, fs.elname, value ); + if( fs.state & FileStorage::INSIDE_MAP ) + fs.state = FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP; + return fs; +} + +/** @brief Writes data to a file storage. + */ +static inline +FileStorage& operator << (FileStorage& fs, const char* str) +{ + return (fs << String(str)); +} + +/** @brief Writes data to a file storage. + */ +static inline +FileStorage& operator << (FileStorage& fs, char* value) +{ + return (fs << String(value)); +} + +//! @} FileStorage + +//! @relates cv::FileNodeIterator +//! @{ + +/** @brief Reads data from a file storage. + */ +template static inline +FileNodeIterator& operator >> (FileNodeIterator& it, _Tp& value) +{ + read( *it, value, _Tp()); + return ++it; +} + +/** @brief Reads data from a file storage. + */ +template static inline +FileNodeIterator& operator >> (FileNodeIterator& it, std::vector<_Tp>& vec) +{ + cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it); + r(vec, (size_t)INT_MAX); + return it; +} + +//! @} FileNodeIterator + +//! @relates cv::FileNode +//! @{ + +/** @brief Reads data from a file storage. + */ +template static inline +void operator >> (const FileNode& n, _Tp& value) +{ + read( n, value, _Tp()); +} + +/** @brief Reads data from a file storage. + */ +template static inline +void operator >> (const FileNode& n, std::vector<_Tp>& vec) +{ + FileNodeIterator it = n.begin(); + it >> vec; +} + +/** @brief Reads KeyPoint from a file storage. +*/ +//It needs special handling because it contains two types of fields, int & float. +static inline +void operator >> (const FileNode& n, KeyPoint& kpt) +{ + FileNodeIterator it = n.begin(); + it >> kpt.pt.x >> kpt.pt.y >> kpt.size >> kpt.angle >> kpt.response >> kpt.octave >> kpt.class_id; +} + +#ifdef CV__LEGACY_PERSISTENCE +static inline +void operator >> (const FileNode& n, std::vector& vec) +{ + read(n, vec); +} +static inline +void operator >> (const FileNode& n, std::vector& vec) +{ + read(n, vec); +} +#endif + +/** @brief Reads DMatch from a file storage. +*/ +//It needs special handling because it contains two types of fields, int & float. +static inline +void operator >> (const FileNode& n, DMatch& m) +{ + FileNodeIterator it = n.begin(); + it >> m.queryIdx >> m.trainIdx >> m.imgIdx >> m.distance; +} + +//! @} FileNode + +//! @relates cv::FileNodeIterator +//! @{ + +CV_EXPORTS bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2); +CV_EXPORTS bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2); + +static inline +ptrdiff_t operator - (const FileNodeIterator& it1, const FileNodeIterator& it2) +{ + return it2.remaining() - it1.remaining(); +} + +static inline +bool operator < (const FileNodeIterator& it1, const FileNodeIterator& it2) +{ + return it1.remaining() > it2.remaining(); +} + +//! @} FileNodeIterator + +} // cv + +#endif // OPENCV_CORE_PERSISTENCE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/quaternion.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/quaternion.hpp new file mode 100644 index 0000000..7bc51e6 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/quaternion.hpp @@ -0,0 +1,1515 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang +#ifndef OPENCV_CORE_QUATERNION_HPP +#define OPENCV_CORE_QUATERNION_HPP + +#include +#include +namespace cv +{ +//! @addtogroup core +//! @{ + +//! Unit quaternion flag +enum QuatAssumeType +{ + /** + * This flag is specified by default. + * If this flag is specified, the input quaternions are assumed to be not unit quaternions. + * It can guarantee the correctness of the calculations, + * although the calculation speed will be slower than the flag QUAT_ASSUME_UNIT. + */ + QUAT_ASSUME_NOT_UNIT, + /** + * If this flag is specified, the input quaternions are assumed to be unit quaternions which + * will save some computations. However, if this flag is specified without unit quaternion, + * the program correctness of the result will not be guaranteed. + */ + QUAT_ASSUME_UNIT +}; + +template class Quat; +template std::ostream& operator<<(std::ostream&, const Quat<_Tp>&); + +/** + * Quaternion is a number system that extends the complex numbers. It can be expressed as a + * rotation in three-dimensional space. + * A quaternion is generally represented in the form: + * \f[q = w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\f] + * \f[q = [w, x, y, z]\f] + * \f[q = [w, \boldsymbol{v}] \f] + * \f[q = ||q||[\cos\psi, u_x\sin\psi,u_y\sin\psi, u_z\sin\psi].\f] + * \f[q = ||q||[\cos\psi, \boldsymbol{u}\sin\psi]\f] + * where \f$\psi = \frac{\theta}{2}\f$, \f$\theta\f$ represents rotation angle, + * \f$\boldsymbol{u} = [u_x, u_y, u_z]\f$ represents normalized rotation axis, + * and \f$||q||\f$ represents the norm of \f$q\f$. + * + * A unit quaternion is usually represents rotation, which has the form: + * \f[q = [\cos\psi, u_x\sin\psi,u_y\sin\psi, u_z\sin\psi].\f] + * + * To create a quaternion representing the rotation around the axis \f$\boldsymbol{u}\f$ + * with angle \f$\theta\f$, you can use + * ``` + * using namespace cv; + * double angle = CV_PI; + * Vec3d axis = {0, 0, 1}; + * Quatd q = Quatd::createFromAngleAxis(angle, axis); + * ``` + * + * You can simply use four same type number to create a quaternion + * ``` + * Quatd q(1, 2, 3, 4); + * ``` + * Or use a Vec4d or Vec4f vector. + * ``` + * Vec4d vec{1, 2, 3, 4}; + * Quatd q(vec); + * ``` + * + * ``` + * Vec4f vec{1, 2, 3, 4}; + * Quatf q(vec); + * ``` + * + * If you already have a 3x3 rotation matrix R, then you can use + * ``` + * Quatd q = Quatd::createFromRotMat(R); + * ``` + * + * If you already have a rotation vector rvec which has the form of `angle * axis`, then you can use + * ``` + * Quatd q = Quatd::createFromRvec(rvec); + * ``` + * + * To extract the rotation matrix from quaternion, see toRotMat3x3() + * + * To extract the Vec4d or Vec4f, see toVec() + * + * To extract the rotation vector, see toRotVec() + * + * If there are two quaternions \f$q_0, q_1\f$ are needed to interpolate, you can use nlerp(), slerp() or spline() + * ``` + * Quatd::nlerp(q0, q1, t) + * + * Quatd::slerp(q0, q1, t) + * + * Quatd::spline(q0, q0, q1, q1, t) + * ``` + * spline can smoothly connect rotations of multiple quaternions + * + * Three ways to get an element in Quaternion + * ``` + * Quatf q(1,2,3,4); + * std::cout << q.w << std::endl; // w=1, x=2, y=3, z=4 + * std::cout << q[0] << std::endl; // q[0]=1, q[1]=2, q[2]=3, q[3]=4 + * std::cout << q.at(0) << std::endl; + * ``` + */ +template +class Quat +{ + static_assert(std::is_floating_point<_Tp>::value, "Quaternion only make sense with type of float or double"); + using value_type = _Tp; + +public: + static constexpr _Tp CV_QUAT_EPS = (_Tp)1.e-6; + + Quat(); + + /** + * @brief From Vec4d or Vec4f. + */ + explicit Quat(const Vec<_Tp, 4> &coeff); + + /** + * @brief from four numbers. + */ + Quat(_Tp w, _Tp x, _Tp y, _Tp z); + + /** + * @brief from an angle, axis. Axis will be normalized in this function. And + * it generates + * \f[q = [\cos\psi, u_x\sin\psi,u_y\sin\psi, u_z\sin\psi].\f] + * where \f$\psi = \frac{\theta}{2}\f$, \f$\theta\f$ is the rotation angle. + */ + static Quat<_Tp> createFromAngleAxis(const _Tp angle, const Vec<_Tp, 3> &axis); + + /** + * @brief from a 3x3 rotation matrix. + */ + static Quat<_Tp> createFromRotMat(InputArray R); + + /** + * @brief from a rotation vector + * \f$r\f$ has the form \f$\theta \cdot \boldsymbol{u}\f$, where \f$\theta\f$ + * represents rotation angle and \f$\boldsymbol{u}\f$ represents normalized rotation axis. + * + * Angle and axis could be easily derived as: + * \f[ + * \begin{equation} + * \begin{split} + * \psi &= ||r||\\ + * \boldsymbol{u} &= \frac{r}{\theta} + * \end{split} + * \end{equation} + * \f] + * Then a quaternion can be calculated by + * \f[q = [\cos\psi, \boldsymbol{u}\sin\psi]\f] + * where \f$\psi = \theta / 2 \f$ + */ + static Quat<_Tp> createFromRvec(InputArray rvec); + + /** + * @brief a way to get element. + * @param index over a range [0, 3]. + * + * A quaternion q + * + * q.at(0) is equivalent to q.w, + * + * q.at(1) is equivalent to q.x, + * + * q.at(2) is equivalent to q.y, + * + * q.at(3) is equivalent to q.z. + */ + _Tp at(size_t index) const; + + /** + * @brief return the conjugate of this quaternion. + * \f[q.conjugate() = (w, -x, -y, -z).\f] + */ + Quat<_Tp> conjugate() const; + + /** + * + * @brief return the value of exponential value. + * \f[\exp(q) = e^w (\cos||\boldsymbol{v}||+ \frac{v}{||\boldsymbol{v}||})\sin||\boldsymbol{v}||\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example: + * ``` + * Quatd q{1,2,3,4}; + * cout << exp(q) << endl; + * ``` + */ + template + friend Quat exp(const Quat &q); + + /** + * @brief return the value of exponential value. + * \f[\exp(q) = e^w (\cos||\boldsymbol{v}||+ \frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q{1,2,3,4}; + * cout << q.exp() << endl; + * ``` + */ + Quat<_Tp> exp() const; + + /** + * @brief return the value of logarithm function. + * \f[\ln(q) = \ln||q|| + \frac{\boldsymbol{v}}{||\boldsymbol{v}||}\arccos\frac{w}{||q||}.\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q1{1,2,3,4}; + * cout << log(q1) << endl; + * ``` + */ + template + friend Quat log(const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return the value of logarithm function. + * \f[\ln(q) = \ln||q|| + \frac{\boldsymbol{v}}{||\boldsymbol{v}||}\arccos\frac{w}{||q||}\f]. + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.log(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * Quatd q1(1,2,3,4); + * q1.normalize().log(assumeUnit); + * ``` + */ + Quat<_Tp> log(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of power function with index \f$x\f$. + * \f[q^x = ||q||(cos(x\theta) + \boldsymbol{u}sin(x\theta))).\f] + * @param q a quaternion. + * @param x index of exponentiation. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * power(q, 2.0); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * double angle = CV_PI; + * Vec3d axis{0, 0, 1}; + * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle + * power(q1, 2.0, assumeUnit);//This assumeUnit means q1 is a unit quaternion. + * ``` + * @note the type of the index should be the same as the quaternion. + */ + template + friend Quat power(const Quat &q, const T x, QuatAssumeType assumeUnit); + + /** + * @brief return the value of power function with index \f$x\f$. + * \f[q^x = ||q||(\cos(x\theta) + \boldsymbol{u}\sin(x\theta))).\f] + * @param x index of exponentiation. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.power(2.0); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * double angle = CV_PI; + * Vec3d axis{0, 0, 1}; + * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle + * q1.power(2.0, assumeUnit); //This assumeUnt means q1 is a unit quaternion + * ``` + */ + Quat<_Tp> power(const _Tp x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return \f$\sqrt{q}\f$. + * @param q a quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatf q(1,2,3,4); + * sqrt(q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = {1,0,0,0}; + * sqrt(q, assumeUnit); //This assumeUnit means q is a unit quaternion. + * ``` + */ + template + friend Quat sqrt(const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return \f$\sqrt{q}\f$. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatf q(1,2,3,4); + * q.sqrt(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = {1,0,0,0}; + * q.sqrt(assumeUnit); //This assumeUnit means q is a unit quaternion + * ``` + */ + Quat<_Tp> sqrt(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of power function with quaternion \f$q\f$. + * \f[p^q = e^{q\ln(p)}.\f] + * @param p base quaternion of power function. + * @param q index quaternion of power function. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion \f$p\f$ assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd p(1,2,3,4); + * Quatd q(5,6,7,8); + * power(p, q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * p = p.normalize(); + * power(p, q, assumeUnit); //This assumeUnit means p is a unit quaternion + * ``` + */ + template + friend Quat power(const Quat &p, const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return the value of power function with quaternion \f$q\f$. + * \f[p^q = e^{q\ln(p)}.\f] + * @param q index quaternion of power function. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd p(1,2,3,4); + * Quatd q(5,6,7,8); + * p.power(q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * p = p.normalize(); + * p.power(q, assumeUnit); //This assumeUnit means p is a unit quaternion + * ``` + */ + Quat<_Tp> power(const Quat<_Tp> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the crossProduct between \f$p = (a, b, c, d) = (a, \boldsymbol{u})\f$ and \f$q = (w, x, y, z) = (w, \boldsymbol{v})\f$. + * \f[p \times q = \frac{pq- qp}{2}\f] + * \f[p \times q = \boldsymbol{u} \times \boldsymbol{v}\f] + * \f[p \times q = (cz-dy)i + (dx-bz)j + (by-xc)k \f] + * + * For example + * ``` + * Quatd q{1,2,3,4}; + * Quatd p{5,6,7,8}; + * crossProduct(p, q); + * ``` + */ + template + friend Quat crossProduct(const Quat &p, const Quat &q); + + /** + * @brief return the crossProduct between \f$p = (a, b, c, d) = (a, \boldsymbol{u})\f$ and \f$q = (w, x, y, z) = (w, \boldsymbol{v})\f$. + * \f[p \times q = \frac{pq- qp}{2}.\f] + * \f[p \times q = \boldsymbol{u} \times \boldsymbol{v}.\f] + * \f[p \times q = (cz-dy)i + (dx-bz)j + (by-xc)k. \f] + * + * For example + * ``` + * Quatd q{1,2,3,4}; + * Quatd p{5,6,7,8}; + * p.crossProduct(q) + * ``` + */ + Quat<_Tp> crossProduct(const Quat<_Tp> &q) const; + + /** + * @brief return the norm of quaternion. + * \f[||q|| = \sqrt{w^2 + x^2 + y^2 + z^2}.\f] + */ + _Tp norm() const; + + /** + * @brief return a normalized \f$p\f$. + * \f[p = \frac{q}{||q||}\f] + * where \f$p\f$ satisfies \f$(p.x)^2 + (p.y)^2 + (p.z)^2 + (p.w)^2 = 1.\f$ + */ + Quat<_Tp> normalize() const; + + /** + * @brief return \f$q^{-1}\f$ which is an inverse of \f$q\f$ + * which satisfies \f$q * q^{-1} = 1\f$. + * @param q a quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * inv(q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = q.normalize(); + * inv(q, assumeUnit);//This assumeUnit means p is a unit quaternion + * ``` + */ + template + friend Quat inv(const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return \f$q^{-1}\f$ which is an inverse of \f$q\f$ + * satisfying \f$q * q^{-1} = 1\f$. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.inv(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = q.normalize(); + * q.inv(assumeUnit); //assumeUnit means p is a unit quaternion + * ``` + */ + Quat<_Tp> inv(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return sinh value of quaternion q, sinh could be calculated as: + * \f[\sinh(p) = \sin(w)\cos(||\boldsymbol{v}||) + \cosh(w)\frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * sinh(q); + * ``` + */ + template + friend Quat sinh(const Quat &q); + + /** + * @brief return sinh value of this quaternion, sinh could be calculated as: + * \f$\sinh(p) = \sin(w)\cos(||\boldsymbol{v}||) + \cosh(w)\frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||\f$ + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.sinh(); + * ``` + */ + Quat<_Tp> sinh() const; + + /** + * @brief return cosh value of quaternion q, cosh could be calculated as: + * \f[\cosh(p) = \cosh(w) * \cos(||\boldsymbol{v}||) + \sinh(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sin(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * cosh(q); + * ``` + */ + template + friend Quat cosh(const Quat &q); + + /** + * @brief return cosh value of this quaternion, cosh could be calculated as: + * \f[\cosh(p) = \cosh(w) * \cos(||\boldsymbol{v}||) + \sinh(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}sin(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.cosh(); + * ``` + */ + Quat<_Tp> cosh() const; + + /** + * @brief return tanh value of quaternion q, tanh could be calculated as: + * \f[ \tanh(q) = \frac{\sinh(q)}{\cosh(q)}.\f] + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * tanh(q); + * ``` + * @sa sinh, cosh + */ + template + friend Quat tanh(const Quat &q); + + /** + * @brief return tanh value of this quaternion, tanh could be calculated as: + * \f[ \tanh(q) = \frac{\sinh(q)}{\cosh(q)}.\f] + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.tanh(); + * ``` + * @sa sinh, cosh + */ + Quat<_Tp> tanh() const; + + /** + * @brief return tanh value of quaternion q, sin could be calculated as: + * \f[\sin(p) = \sin(w) * \cosh(||\boldsymbol{v}||) + \cos(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * sin(q); + * ``` + */ + template + friend Quat sin(const Quat &q); + + /** + * @brief return sin value of this quaternion, sin could be calculated as: + * \f[\sin(p) = \sin(w) * \cosh(||\boldsymbol{v}||) + \cos(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.sin(); + * ``` + */ + Quat<_Tp> sin() const; + + /** + * @brief return sin value of quaternion q, cos could be calculated as: + * \f[\cos(p) = \cos(w) * \cosh(||\boldsymbol{v}||) - \sin(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * cos(q); + * ``` + */ + template + friend Quat cos(const Quat &q); + + /** + * @brief return cos value of this quaternion, cos could be calculated as: + * \f[\cos(p) = \cos(w) * \cosh(||\boldsymbol{v}||) - \sin(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.cos(); + * ``` + */ + Quat<_Tp> cos() const; + + /** + * @brief return tan value of quaternion q, tan could be calculated as: + * \f[\tan(q) = \frac{\sin(q)}{\cos(q)}.\f] + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * tan(q); + * ``` + */ + template + friend Quat tan(const Quat &q); + + /** + * @brief return tan value of this quaternion, tan could be calculated as: + * \f[\tan(q) = \frac{\sin(q)}{\cos(q)}.\f] + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.tan(); + * ``` + */ + Quat<_Tp> tan() const; + + /** + * @brief return arcsin value of quaternion q, arcsin could be calculated as: + * \f[\arcsin(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arcsinh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * asin(q); + * ``` + */ + template + friend Quat asin(const Quat &q); + + /** + * @brief return arcsin value of this quaternion, arcsin could be calculated as: + * \f[\arcsin(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arcsinh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.asin(); + * ``` + */ + Quat<_Tp> asin() const; + + /** + * @brief return arccos value of quaternion q, arccos could be calculated as: + * \f[\arccos(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arccosh(q)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * acos(q); + * ``` + */ + template + friend Quat acos(const Quat &q); + + /** + * @brief return arccos value of this quaternion, arccos could be calculated as: + * \f[\arccos(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arccosh(q)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.acos(); + * ``` + */ + Quat<_Tp> acos() const; + + /** + * @brief return arctan value of quaternion q, arctan could be calculated as: + * \f[\arctan(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arctanh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * atan(q); + * ``` + */ + template + friend Quat atan(const Quat &q); + + /** + * @brief return arctan value of this quaternion, arctan could be calculated as: + * \f[\arctan(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arctanh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.atan(); + * ``` + */ + Quat<_Tp> atan() const; + + /** + * @brief return arcsinh value of quaternion q, arcsinh could be calculated as: + * \f[arcsinh(q) = \ln(q + \sqrt{q^2 + 1})\f]. + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * asinh(q); + * ``` + */ + template + friend Quat asinh(const Quat &q); + + /** + * @brief return arcsinh value of this quaternion, arcsinh could be calculated as: + * \f[arcsinh(q) = \ln(q + \sqrt{q^2 + 1})\f]. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.asinh(); + * ``` + */ + Quat<_Tp> asinh() const; + + /** + * @brief return arccosh value of quaternion q, arccosh could be calculated as: + * \f[arccosh(q) = \ln(q + \sqrt{q^2 - 1})\f]. + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * acosh(q); + * ``` + */ + template + friend Quat acosh(const Quat &q); + + /** + * @brief return arccosh value of this quaternion, arccosh could be calculated as: + * \f[arcosh(q) = \ln(q + \sqrt{q^2 - 1})\f]. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.acosh(); + * ``` + */ + Quat<_Tp> acosh() const; + + /** + * @brief return arctanh value of quaternion q, arctanh could be calculated as: + * \f[arctanh(q) = \frac{\ln(q + 1) - \ln(1 - q)}{2}\f]. + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * atanh(q); + * ``` + */ + template + friend Quat atanh(const Quat &q); + + /** + * @brief return arctanh value of this quaternion, arctanh could be calculated as: + * \f[arcsinh(q) = \frac{\ln(q + 1) - \ln(1 - q)}{2}\f]. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.atanh(); + * ``` + */ + Quat<_Tp> atanh() const; + + /** + * @brief return true if this quaternion is a unit quaternion. + * @param eps tolerance scope of normalization. The eps could be defined as + * + * \f[eps = |1 - dotValue|\f] where \f[dotValue = (this.w^2 + this.x^2 + this,y^2 + this.z^2).\f] + * And this function will consider it is normalized when the dotValue over a range \f$[1-eps, 1+eps]\f$. + */ + bool isNormal(_Tp eps=CV_QUAT_EPS) const; + + /** + * @brief to throw an error if this quaternion is not a unit quaternion. + * @param eps tolerance scope of normalization. + * @sa isNormal + */ + void assertNormal(_Tp eps=CV_QUAT_EPS) const; + + /** + * @brief transform a quaternion to a 3x3 rotation matrix. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. Otherwise, this function will normalized this + * quaternion at first then to do the transformation. + * + * @note Matrix A which is to be rotated should have the form + * \f[\begin{bmatrix} + * x_0& x_1& x_2&...&x_n\\ + * y_0& y_1& y_2&...&y_n\\ + * z_0& z_1& z_2&...&z_n + * \end{bmatrix}\f] + * where the same subscript represents a point. The shape of A assume to be [3, n] + * The points matrix A can be rotated by toRotMat3x3() * A. + * The result has 3 rows and n columns too. + + * For example + * ``` + * double angle = CV_PI; + * Vec3d axis{0,0,1}; + * Quatd q_unit = Quatd::createFromAngleAxis(angle, axis); //quaternion could also be get by interpolation by two or more quaternions. + * + * //assume there is two points (1,0,0) and (1,0,1) to be rotated + * Mat pointsA = (Mat_(2, 3) << 1,0,0,1,0,1); + * //change the shape + * pointsA = pointsA.t(); + * // rotate 180 degrees around the z axis + * Mat new_point = q_unit.toRotMat3x3() * pointsA; + * // print two points + * cout << new_point << endl; + * ``` + */ + Matx<_Tp, 3, 3> toRotMat3x3(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief transform a quaternion to a 4x4 rotation matrix. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. Otherwise, this function will normalized this + * quaternion at first then to do the transformation. + * + * The operations is similar as toRotMat3x3 + * except that the points matrix should have the form + * \f[\begin{bmatrix} + * x_0& x_1& x_2&...&x_n\\ + * y_0& y_1& y_2&...&y_n\\ + * z_0& z_1& z_2&...&z_n\\ + * 0&0&0&...&0 + * \end{bmatrix}\f] + * + * @sa toRotMat3x3 + */ + + Matx<_Tp, 4, 4> toRotMat4x4(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief transform the this quaternion to a Vec. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.toVec(); + * ``` + */ + Vec<_Tp, 4> toVec() const; + + /** + * @brief transform this quaternion to a Rotation vector. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. + * Rotation vector rVec is defined as: + * \f[ rVec = [\theta v_x, \theta v_y, \theta v_z]\f] + * where \f$\theta\f$ represents rotation angle, and \f$\boldsymbol{v}\f$ represents the normalized rotation axis. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.toRotVec(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q.normalize().toRotVec(assumeUnit); //answer is same as q.toRotVec(). + * ``` + */ + Vec<_Tp, 3> toRotVec(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief get the angle of quaternion, it returns the rotation angle. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. + * \f[\psi = 2 *arccos(\frac{w}{||q||})\f] + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.getAngle(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q.normalize().getAngle(assumeUnit);//same as q.getAngle(). + * ``` + * @note It always return the value between \f$[0, 2\pi]\f$. + */ + _Tp getAngle(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief get the axis of quaternion, it returns a vector of length 3. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. + * + * the unit axis \f$\boldsymbol{u}\f$ is defined by + * \f[\begin{equation} + * \begin{split} + * \boldsymbol{v} + * &= \boldsymbol{u} ||\boldsymbol{v}||\\ + * &= \boldsymbol{u}||q||sin(\frac{\theta}{2}) + * \end{split} + * \end{equation}\f] + * where \f$v=[x, y ,z]\f$ and \f$\theta\f$ represents rotation angle. + * + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.getAxis(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q.normalize().getAxis(assumeUnit);//same as q.getAxis() + * ``` + */ + Vec<_Tp, 3> getAxis(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the dot between quaternion \f$q\f$ and this quaternion. + * + * dot(p, q) is a good metric of how close the quaternions are. + * Indeed, consider the unit quaternion difference \f$p^{-1} * q\f$, its real part is dot(p, q). + * At the same time its real part is equal to \f$\cos(\beta/2)\f$ where \f$\beta\f$ is + * an angle of rotation between p and q, i.e., + * Therefore, the closer dot(p, q) to 1, + * the smaller rotation between them. + * \f[p \cdot q = p.w \cdot q.w + p.x \cdot q.x + p.y \cdot q.y + p.z \cdot q.z\f] + * @param q the other quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * Quatd p(5,6,7,8); + * p.dot(q); + * ``` + */ + _Tp dot(Quat<_Tp> q) const; + + /** + * @brief To calculate the interpolation from \f$q_0\f$ to \f$q_1\f$ by Linear Interpolation(Nlerp) + * For two quaternions, this interpolation curve can be displayed as: + * \f[Lerp(q_0, q_1, t) = (1 - t)q_0 + tq_1.\f] + * Obviously, the lerp will interpolate along a straight line if we think of \f$q_0\f$ and \f$q_1\f$ as a vector + * in a two-dimensional space. When \f$t = 0\f$, it returns \f$q_0\f$ and when \f$t= 1\f$, it returns \f$q_1\f$. + * \f$t\f$ should to be ranged in \f$[0, 1]\f$ normally. + * @param q0 a quaternion used in linear interpolation. + * @param q1 a quaternion used in linear interpolation. + * @param t percent of vector \f$\overrightarrow{q_0q_1}\f$ over a range [0, 1]. + * @note it returns a non-unit quaternion. + */ + static Quat<_Tp> lerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t); + + /** + * @brief To calculate the interpolation from \f$q_0\f$ to \f$q_1\f$ by Normalized Linear Interpolation(Nlerp). + * it returns a normalized quaternion of Linear Interpolation(Lerp). + * \f[ Nlerp(q_0, q_1, t) = \frac{(1 - t)q_0 + tq_1}{||(1 - t)q_0 + tq_1||}.\f] + * The interpolation will always choose the shortest path but the constant speed is not guaranteed. + * @param q0 a quaternion used in normalized linear interpolation. + * @param q1 a quaternion used in normalized linear interpolation. + * @param t percent of vector \f$\overrightarrow{q_0q_1}\f$ over a range [0, 1]. + * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all inputs + quaternion will be normalized inside the function. + * @sa lerp + */ + static Quat<_Tp> nlerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + @brief To calculate the interpolation between \f$q_0\f$ and \f$q_1\f$ by Spherical Linear + Interpolation(Slerp), which can be defined as: + \f[ Slerp(q_0, q_1, t) = \frac{\sin((1-t)\theta)}{\sin(\theta)}q_0 + \frac{\sin(t\theta)}{\sin(\theta)}q_1\f] + where \f$\theta\f$ can be calculated as: + \f[\theta=cos^{-1}(q_0\cdot q_1)\f] + resulting from the both of their norm is unit. + @param q0 a quaternion used in Slerp. + @param q1 a quaternion used in Slerp. + @param t percent of angle between \f$q_0\f$ and \f$q_1\f$ over a range [0, 1]. + @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternions. Otherwise, all input + quaternions will be normalized inside the function. + @param directChange if QUAT_ASSUME_UNIT, the interpolation will choose the nearest path. + @note If the interpolation angle is small, the error between Nlerp and Slerp is not so large. To improve efficiency and + avoid zero division error, we use Nlerp instead of Slerp. + */ + static Quat<_Tp> slerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT, bool directChange=true); + + /** + * @brief To calculate the interpolation between \f$q_0\f$,\f$q_1\f$,\f$q_2\f$,\f$q_3\f$ by Spherical and quadrangle(Squad). This could be defined as: + * \f[Squad(q_i, s_i, s_{i+1}, q_{i+1}, t) = Slerp(Slerp(q_i, q_{i+1}, t), Slerp(s_i, s_{i+1}, t), 2t(1-t))\f] + * where + * \f[s_i = q_i\exp(-\frac{\log(q^*_iq_{i+1}) + \log(q^*_iq_{i-1})}{4})\f] + * + * The Squad expression is analogous to the \f$B\acute{e}zier\f$ curve, but involves spherical linear + * interpolation instead of simple linear interpolation. Each \f$s_i\f$ needs to be calculated by three + * quaternions. + * + * @param q0 the first quaternion. + * @param s0 the second quaternion. + * @param s1 the third quaternion. + * @param q1 thr fourth quaternion. + * @param t interpolation parameter of quadratic and linear interpolation over a range \f$[0, 1]\f$. + * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all input + * quaternions will be normalized inside the function. + * @param directChange if QUAT_ASSUME_UNIT, squad will find the nearest path to interpolate. + * @sa interPoint, spline + */ + static Quat<_Tp> squad(const Quat<_Tp> &q0, const Quat<_Tp> &s0, + const Quat<_Tp> &s1, const Quat<_Tp> &q1, + const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT, + bool directChange=true); + + /** + * @brief This is the part calculation of squad. + * To calculate the intermedia quaternion \f$s_i\f$ between each three quaternion + * \f[s_i = q_i\exp(-\frac{\log(q^*_iq_{i+1}) + \log(q^*_iq_{i-1})}{4}).\f] + * @param q0 the first quaternion. + * @param q1 the second quaternion. + * @param q2 the third quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all input + * quaternions will be normalized inside the function. + * @sa squad + */ + static Quat<_Tp> interPoint(const Quat<_Tp> &q0, const Quat<_Tp> &q1, + const Quat<_Tp> &q2, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief to calculate a quaternion which is the result of a \f$C^1\f$ continuous + * spline curve constructed by squad at the ratio t. Here, the interpolation values are + * between \f$q_1\f$ and \f$q_2\f$. \f$q_0\f$ and \f$q_2\f$ are used to ensure the \f$C^1\f$ + * continuity. if t = 0, it returns \f$q_1\f$, if t = 1, it returns \f$q_2\f$. + * @param q0 the first input quaternion to ensure \f$C^1\f$ continuity. + * @param q1 the second input quaternion. + * @param q2 the third input quaternion. + * @param q3 the fourth input quaternion the same use of \f$q1\f$. + * @param t ratio over a range [0, 1]. + * @param assumeUnit if QUAT_ASSUME_UNIT, \f$q_0, q_1, q_2, q_3\f$ assume to be unit quaternion. Otherwise, all input + * quaternions will be normalized inside the function. + * + * For example: + * + * If there are three double quaternions \f$v_0, v_1, v_2\f$ waiting to be interpolated. + * + * Interpolation between \f$v_0\f$ and \f$v_1\f$ with a ratio \f$t_0\f$ could be calculated as + * ``` + * Quatd::spline(v0, v0, v1, v2, t0); + * ``` + * Interpolation between \f$v_1\f$ and \f$v_2\f$ with a ratio \f$t_0\f$ could be calculated as + * ``` + * Quatd::spline(v0, v1, v2, v2, t0); + * ``` + * @sa squad, slerp + */ + static Quat<_Tp> spline(const Quat<_Tp> &q0, const Quat<_Tp> &q1, + const Quat<_Tp> &q2, const Quat<_Tp> &q3, + const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief Return opposite quaternion \f$-p\f$ + * which satisfies \f$p + (-p) = 0.\f$ + * + * For example + * ``` + * Quatd q{1, 2, 3, 4}; + * std::cout << -q << std::endl; // [-1, -2, -3, -4] + * ``` + */ + Quat<_Tp> operator-() const; + + /** + * @brief return true if two quaternions p and q are nearly equal, i.e. when the absolute + * value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_QUAT_EPS. + */ + bool operator==(const Quat<_Tp>&) const; + + /** + * @brief Addition operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p + q << std::endl; //[6, 8, 10, 12] + * ``` + */ + Quat<_Tp> operator+(const Quat<_Tp>&) const; + + /** + * @brief Addition assignment operator of two quaternions p and q. + * It adds right operand to the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p += q; // equivalent to p = p + q + * std::cout << p << std::endl; //[6, 8, 10, 12] + * + * ``` + */ + Quat<_Tp>& operator+=(const Quat<_Tp>&); + + /** + * @brief Subtraction operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p - q << std::endl; //[-4, -4, -4, -4] + * ``` + */ + Quat<_Tp> operator-(const Quat<_Tp>&) const; + + /** + * @brief Subtraction assignment operator of two quaternions p and q. + * It subtracts right operand from the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p -= q; // equivalent to p = p - q + * std::cout << p << std::endl; //[-4, -4, -4, -4] + * + * ``` + */ + Quat<_Tp>& operator-=(const Quat<_Tp>&); + + /** + * @brief Multiplication assignment operator of two quaternions q and p. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p *= q; // equivalent to p = p * q + * std::cout << p << std::endl; //[-60, 12, 30, 24] + * ``` + */ + Quat<_Tp>& operator*=(const Quat<_Tp>&); + + /** + * @brief Multiplication assignment operator of a quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p *= s; // equivalent to p = p * s + * std::cout << p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator*=(const _Tp s); + + /** + * @brief Multiplication operator of two quaternions q and p. + * Multiplies values on either side of the operator. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p * q << std::endl; //[-60, 12, 30, 24] + * ``` + */ + Quat<_Tp> operator*(const Quat<_Tp>&) const; + + /** + * @brief Division operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w/s, x/s, y/s, z/s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1, 1.5, 2] + * ``` + * @note the type of scalar should be equal to this quaternion. + */ + Quat<_Tp> operator/(const _Tp s) const; + + /** + * @brief Division operator of two quaternions p and q. + * Divides left hand operand by right hand operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / q &= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p / q << std::endl; // equivalent to p * q.inv() + * ``` + */ + Quat<_Tp> operator/(const Quat<_Tp>&) const; + + /** + * @brief Division assignment operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w / s, x / s, y / s, z / s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0;; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator/=(const _Tp s); + + /** + * @brief Division assignment operator of two quaternions p and q; + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q&= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p /= q; // equivalent to p = p * q.inv() + * std::cout << p << std::endl; + * ``` + */ + Quat<_Tp>& operator/=(const Quat<_Tp>&); + + _Tp& operator[](std::size_t n); + + const _Tp& operator[](std::size_t n) const; + + /** + * @brief Subtraction operator of a scalar and a quaternions. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const T s, const Quat&); + + /** + * @brief Subtraction operator of a quaternions and a scalar. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p - scalar << std::endl; //[-1.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const Quat&, const T s); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const T s, const Quat&); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const Quat&, const T s); + + /** + * @brief Multiplication operator of a scalar and a quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << s * p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const T s, const Quat&); + + /** + * @brief Multiplication operator of a quaternion and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << p * s << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const Quat&, const T s); + + template + friend std::ostream& cv::operator<<(std::ostream&, const Quat&); + + _Tp w, x, y, z; + +}; + +template +Quat inv(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat sinh(const Quat &q); + +template +Quat cosh(const Quat &q); + +template +Quat tanh(const Quat &q); + +template +Quat sin(const Quat &q); + +template +Quat cos(const Quat &q); + +template +Quat tan(const Quat &q); + +template +Quat asinh(const Quat &q); + +template +Quat acosh(const Quat &q); + +template +Quat atanh(const Quat &q); + +template +Quat asin(const Quat &q); + +template +Quat acos(const Quat &q); + +template +Quat atan(const Quat &q); + +template +Quat power(const Quat &q, const Quat &p, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat exp(const Quat &q); + +template +Quat log(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat power(const Quat& q, const T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat crossProduct(const Quat &p, const Quat &q); + +template +Quat sqrt(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat operator*(const T, const Quat&); + +template +Quat operator*(const Quat&, const T); + +template +std::ostream& operator<<(std::ostream&, const Quat&); + +using Quatd = Quat; +using Quatf = Quat; + +//! @} core +} + +#include "opencv2/core/quaternion.inl.hpp" + +#endif /* OPENCV_CORE_QUATERNION_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/quaternion.inl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/quaternion.inl.hpp new file mode 100644 index 0000000..f665dbe --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/quaternion.inl.hpp @@ -0,0 +1,872 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang + +#ifndef OPENCV_CORE_QUATERNION_INL_HPP +#define OPENCV_CORE_QUATERNION_INL_HPP + +#ifndef OPENCV_CORE_QUATERNION_HPP +#erorr This is not a standalone header. Include quaternion.hpp instead. +#endif + +//@cond IGNORE +/////////////////////////////////////////////////////////////////////////////////////// +//Implementation +namespace cv { + +template +Quat::Quat() : w(0), x(0), y(0), z(0) {} + +template +Quat::Quat(const Vec &coeff):w(coeff[0]), x(coeff[1]), y(coeff[2]), z(coeff[3]){} + +template +Quat::Quat(const T qw, const T qx, const T qy, const T qz):w(qw), x(qx), y(qy), z(qz){} + +template +Quat Quat::createFromAngleAxis(const T angle, const Vec &axis) +{ + T w, x, y, z; + T vNorm = std::sqrt(axis.dot(axis)); + if (vNorm < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "this quaternion does not represent a rotation"); + } + const T angle_half = angle * 0.5; + w = std::cos(angle_half); + const T sin_v = std::sin(angle_half); + const T sin_norm = sin_v / vNorm; + x = sin_norm * axis[0]; + y = sin_norm * axis[1]; + z = sin_norm * axis[2]; + return Quat(w, x, y, z); +} + +template +Quat Quat::createFromRotMat(InputArray _R) +{ + CV_CheckTypeEQ(_R.type(), cv::traits::Type::value, ""); + if (_R.rows() != 3 || _R.cols() != 3) + { + CV_Error(Error::StsBadArg, "Cannot convert matrix to quaternion: rotation matrix should be a 3x3 matrix"); + } + Matx R; + _R.copyTo(R); + + T S, w, x, y, z; + T trace = R(0, 0) + R(1, 1) + R(2, 2); + if (trace > 0) + { + S = std::sqrt(trace + 1) * 2; + x = (R(1, 2) - R(2, 1)) / S; + y = (R(2, 0) - R(0, 2)) / S; + z = (R(0, 1) - R(1, 0)) / S; + w = -0.25 * S; + } + else if (R(0, 0) > R(1, 1) && R(0, 0) > R(2, 2)) + { + + S = std::sqrt(1.0 + R(0, 0) - R(1, 1) - R(2, 2)) * 2; + x = -0.25 * S; + y = -(R(1, 0) + R(0, 1)) / S; + z = -(R(0, 2) + R(2, 0)) / S; + w = (R(1, 2) - R(2, 1)) / S; + } + else if (R(1, 1) > R(2, 2)) + { + S = std::sqrt(1.0 - R(0, 0) + R(1, 1) - R(2, 2)) * 2; + x = (R(0, 1) + R(1, 0)) / S; + y = 0.25 * S; + z = (R(1, 2) + R(2, 1)) / S; + w = (R(0, 2) - R(2, 0)) / S; + } + else + { + S = std::sqrt(1.0 - R(0, 0) - R(1, 1) + R(2, 2)) * 2; + x = (R(0, 2) + R(2, 0)) / S; + y = (R(1, 2) + R(2, 1)) / S; + z = 0.25 * S; + w = -(R(0, 1) - R(1, 0)) / S; + } + return Quat (w, x, y, z); +} + +template +Quat Quat::createFromRvec(InputArray _rvec) +{ + if (!((_rvec.cols() == 1 && _rvec.rows() == 3) || (_rvec.cols() == 3 && _rvec.rows() == 1))) { + CV_Error(Error::StsBadArg, "Cannot convert rotation vector to quaternion: The length of rotation vector should be 3"); + } + Vec rvec; + _rvec.copyTo(rvec); + T psi = std::sqrt(rvec.dot(rvec)); + if (abs(psi) < CV_QUAT_EPS) { + return Quat (1, 0, 0, 0); + } + Vec axis = rvec / psi; + return createFromAngleAxis(psi, axis); +} + +template +inline Quat Quat::operator-() const +{ + return Quat(-w, -x, -y, -z); +} + + +template +inline bool Quat::operator==(const Quat &q) const +{ + return (abs(w - q.w) < CV_QUAT_EPS && abs(x - q.x) < CV_QUAT_EPS && abs(y - q.y) < CV_QUAT_EPS && abs(z - q.z) < CV_QUAT_EPS); +} + +template +inline Quat Quat::operator+(const Quat &q1) const +{ + return Quat(w + q1.w, x + q1.x, y + q1.y, z + q1.z); +} + +template +inline Quat operator+(const T a, const Quat& q) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator+(const Quat& q, const T a) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator-(const T a, const Quat& q) +{ + return Quat(a - q.w, -q.x, -q.y, -q.z); +} + +template +inline Quat operator-(const Quat& q, const T a) +{ + return Quat(q.w - a, q.x, q.y, q.z); +} + +template +inline Quat Quat::operator-(const Quat &q1) const +{ + return Quat(w - q1.w, x - q1.x, y - q1.y, z - q1.z); +} + +template +inline Quat& Quat::operator+=(const Quat &q1) +{ + w += q1.w; + x += q1.x; + y += q1.y; + z += q1.z; + return *this; +} + +template +inline Quat& Quat::operator-=(const Quat &q1) +{ + w -= q1.w; + x -= q1.x; + y -= q1.y; + z -= q1.z; + return *this; +} + +template +inline Quat Quat::operator*(const Quat &q1) const +{ + Vec q{w, x, y, z}; + Vec q2{q1.w, q1.x, q1.y, q1.z}; + return Quat(q * q2); +} + + +template +Quat operator*(const Quat &q1, const T a) +{ + return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); +} + +template +Quat operator*(const T a, const Quat &q1) +{ + return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); +} + +template +inline Quat& Quat::operator*=(const Quat &q1) +{ + T qw, qx, qy, qz; + qw = w * q1.w - x * q1.x - y * q1.y - z * q1.z; + qx = x * q1.w + w * q1.x + y * q1.z - z * q1.y; + qy = y * q1.w + w * q1.y + z * q1.x - x * q1.z; + qz = z * q1.w + w * q1.z + x * q1.y - y * q1.x; + w = qw; + x = qx; + y = qy; + z = qz; + return *this; +} + +template +inline Quat& Quat::operator/=(const Quat &q1) +{ + Quat q(*this * q1.inv()); + w = q.w; + x = q.x; + y = q.y; + z = q.z; + return *this; +} +template +Quat& Quat::operator*=(const T q1) +{ + w *= q1; + x *= q1; + y *= q1; + z *= q1; + return *this; +} + +template +inline Quat& Quat::operator/=(const T a) +{ + const T a_inv = 1.0 / a; + w *= a_inv; + x *= a_inv; + y *= a_inv; + z *= a_inv; + return *this; +} + +template +inline Quat Quat::operator/(const T a) const +{ + const T a_inv = 1.0 / a; + return Quat(w * a_inv, x * a_inv, y * a_inv, z * a_inv); +} + +template +inline Quat Quat::operator/(const Quat &q) const +{ + return *this * q.inv(); +} + +template +inline const T& Quat::operator[](std::size_t n) const +{ + switch (n) { + case 0: + return w; + case 1: + return x; + case 2: + return y; + case 3: + return z; + default: + CV_Error(Error::StsOutOfRange, "subscript exceeds the index range"); + } +} + +template +inline T& Quat::operator[](std::size_t n) +{ + switch (n) { + case 0: + return w; + case 1: + return x; + case 2: + return y; + case 3: + return z; + default: + CV_Error(Error::StsOutOfRange, "subscript exceeds the index range"); + } +} + +template +std::ostream & operator<<(std::ostream &os, const Quat &q) +{ + os << "Quat " << Vec{q.w, q.x, q.y, q.z}; + return os; +} + +template +inline T Quat::at(size_t index) const +{ + return (*this)[index]; +} + +template +inline Quat Quat::conjugate() const +{ + return Quat(w, -x, -y, -z); +} + +template +inline T Quat::norm() const +{ + return std::sqrt(dot(*this)); +} + +template +Quat exp(const Quat &q) +{ + return q.exp(); +} + +template +Quat Quat::exp() const +{ + Vec v{x, y, z}; + T normV = std::sqrt(v.dot(v)); + T k = normV < CV_QUAT_EPS ? 1 : std::sin(normV) / normV; + return std::exp(w) * Quat(std::cos(normV), v[0] * k, v[1] * k, v[2] * k); +} + +template +Quat log(const Quat &q, QuatAssumeType assumeUnit) +{ + return q.log(assumeUnit); +} + +template +Quat Quat::log(QuatAssumeType assumeUnit) const +{ + Vec v{x, y, z}; + T vNorm = std::sqrt(v.dot(v)); + if (assumeUnit) + { + T k = vNorm < CV_QUAT_EPS ? 1 : std::acos(w) / vNorm; + return Quat(0, v[0] * k, v[1] * k, v[2] * k); + } + T qNorm = norm(); + if (qNorm < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "Cannot apply this quaternion to log function: undefined"); + } + T k = vNorm < CV_QUAT_EPS ? 1 : std::acos(w / qNorm) / vNorm; + return Quat(std::log(qNorm), v[0] * k, v[1] * k, v[2] *k); +} + +template +inline Quat power(const Quat &q1, const T alpha, QuatAssumeType assumeUnit) +{ + return q1.power(alpha, assumeUnit); +} + +template +inline Quat Quat::power(const T alpha, QuatAssumeType assumeUnit) const +{ + if (x * x + y * y + z * z > CV_QUAT_EPS) + { + T angle = getAngle(assumeUnit); + Vec axis = getAxis(assumeUnit); + if (assumeUnit) + { + return createFromAngleAxis(alpha * angle, axis); + } + return std::pow(norm(), alpha) * createFromAngleAxis(alpha * angle, axis); + } + else + { + return std::pow(norm(), alpha) * Quat(w, x, y, z); + } +} + + +template +inline Quat sqrt(const Quat &q, QuatAssumeType assumeUnit) +{ + return q.sqrt(assumeUnit); +} + +template +inline Quat Quat::sqrt(QuatAssumeType assumeUnit) const +{ + return power(0.5, assumeUnit); +} + + +template +inline Quat power(const Quat &p, const Quat &q, QuatAssumeType assumeUnit) +{ + return p.power(q, assumeUnit); +} + + +template +inline Quat Quat::power(const Quat &q, QuatAssumeType assumeUnit) const +{ + return cv::exp(q * log(assumeUnit)); +} + +template +inline T Quat::dot(Quat q1) const +{ + return w * q1.w + x * q1.x + y * q1.y + z * q1.z; +} + + +template +inline Quat crossProduct(const Quat &p, const Quat &q) +{ + return p.crossProduct(q); +} + + +template +inline Quat Quat::crossProduct(const Quat &q) const +{ + return Quat (0, y * q.z - z * q.y, z * q.x - x * q.z, x * q.y - q.x * y); +} + +template +inline Quat Quat::normalize() const +{ + T normVal = norm(); + if (normVal < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "Cannot normalize this quaternion: the norm is too small."); + } + return Quat(w / normVal, x / normVal, y / normVal, z / normVal) ; +} + +template +inline Quat inv(const Quat &q, QuatAssumeType assumeUnit) +{ + return q.inv(assumeUnit); +} + + +template +inline Quat Quat::inv(QuatAssumeType assumeUnit) const +{ + if (assumeUnit) + { + return conjugate(); + } + T norm2 = dot(*this); + if (norm2 < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "This quaternion do not have inverse quaternion"); + } + return conjugate() / norm2; +} + +template +inline Quat sinh(const Quat &q) +{ + return q.sinh(); +} + + +template +inline Quat Quat::sinh() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::cosh(w) * std::sin(vNorm) / vNorm; + return Quat(std::sinh(w) * std::cos(vNorm), v[0] * k, v[1] * k, v[2] * k); +} + + +template +inline Quat cosh(const Quat &q) +{ + return q.cosh(); +} + + +template +inline Quat Quat::cosh() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::sinh(w) * std::sin(vNorm) / vNorm; + return Quat(std::cosh(w) * std::cos(vNorm), v[0] * k, v[1] * k, v[2] * k); +} + +template +inline Quat tanh(const Quat &q) +{ + return q.tanh(); +} + +template +inline Quat Quat::tanh() const +{ + return sinh() * cosh().inv(); +} + + +template +inline Quat sin(const Quat &q) +{ + return q.sin(); +} + + +template +inline Quat Quat::sin() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::cos(w) * std::sinh(vNorm) / vNorm; + return Quat(std::sin(w) * std::cosh(vNorm), v[0] * k, v[1] * k, v[2] * k); +} + +template +inline Quat cos(const Quat &q) +{ + return q.cos(); +} + +template +inline Quat Quat::cos() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::sin(w) * std::sinh(vNorm) / vNorm; + return Quat(std::cos(w) * std::cosh(vNorm), -v[0] * k, -v[1] * k, -v[2] * k); +} + +template +inline Quat tan(const Quat &q) +{ + return q.tan(); +} + +template +inline Quat Quat::tan() const +{ + return sin() * cos().inv(); +} + +template +inline Quat asinh(const Quat &q) +{ + return q.asinh(); +} + +template +inline Quat Quat::asinh() const +{ + return cv::log(*this + cv::power(*this * *this + Quat(1, 0, 0, 0), 0.5)); +} + +template +inline Quat acosh(const Quat &q) +{ + return q.acosh(); +} + +template +inline Quat Quat::acosh() const +{ + return cv::log(*this + cv::power(*this * *this - Quat(1,0,0,0), 0.5)); +} + +template +inline Quat atanh(const Quat &q) +{ + return q.atanh(); +} + +template +inline Quat Quat::atanh() const +{ + Quat ident(1, 0, 0, 0); + Quat c1 = (ident + *this).log(); + Quat c2 = (ident - *this).log(); + return 0.5 * (c1 - c2); +} + +template +inline Quat asin(const Quat &q) +{ + return q.asin(); +} + +template +inline Quat Quat::asin() const +{ + Quat v(0, x, y, z); + T vNorm = v.norm(); + T k = vNorm < CV_QUAT_EPS ? 1 : vNorm; + return -v / k * (*this * v / k).asinh(); +} + +template +inline Quat acos(const Quat &q) +{ + return q.acos(); +} + +template +inline Quat Quat::acos() const +{ + Quat v(0, x, y, z); + T vNorm = v.norm(); + T k = vNorm < CV_QUAT_EPS ? 1 : vNorm; + return -v / k * acosh(); +} + +template +inline Quat atan(const Quat &q) +{ + return q.atan(); +} + +template +inline Quat Quat::atan() const +{ + Quat v(0, x, y, z); + T vNorm = v.norm(); + T k = vNorm < CV_QUAT_EPS ? 1 : vNorm; + return -v / k * (*this * v / k).atanh(); +} + +template +inline T Quat::getAngle(QuatAssumeType assumeUnit) const +{ + if (assumeUnit) + { + return 2 * std::acos(w); + } + if (norm() < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "This quaternion does not represent a rotation"); + } + return 2 * std::acos(w / norm()); +} + +template +inline Vec Quat::getAxis(QuatAssumeType assumeUnit) const +{ + T angle = getAngle(assumeUnit); + const T sin_v = std::sin(angle * 0.5); + if (assumeUnit) + { + return Vec{x, y, z} / sin_v; + } + return Vec {x, y, z} / (norm() * sin_v); +} + +template +Matx Quat::toRotMat4x4(QuatAssumeType assumeUnit) const +{ + T a = w, b = x, c = y, d = z; + if (!assumeUnit) + { + Quat qTemp = normalize(); + a = qTemp.w; + b = qTemp.x; + c = qTemp.y; + d = qTemp.z; + } + Matx R{ + 1 - 2 * (c * c + d * d), 2 * (b * c - a * d) , 2 * (b * d + a * c) , 0, + 2 * (b * c + a * d) , 1 - 2 * (b * b + d * d), 2 * (c * d - a * b) , 0, + 2 * (b * d - a * c) , 2 * (c * d + a * b) , 1 - 2 * (b * b + c * c), 0, + 0 , 0 , 0 , 1, + }; + return R; +} + +template +Matx Quat::toRotMat3x3(QuatAssumeType assumeUnit) const +{ + T a = w, b = x, c = y, d = z; + if (!assumeUnit) + { + Quat qTemp = normalize(); + a = qTemp.w; + b = qTemp.x; + c = qTemp.y; + d = qTemp.z; + } + Matx R{ + 1 - 2 * (c * c + d * d), 2 * (b * c - a * d) , 2 * (b * d + a * c), + 2 * (b * c + a * d) , 1 - 2 * (b * b + d * d), 2 * (c * d - a * b), + 2 * (b * d - a * c) , 2 * (c * d + a * b) , 1 - 2 * (b * b + c * c) + }; + return R; +} + +template +Vec Quat::toRotVec(QuatAssumeType assumeUnit) const +{ + T angle = getAngle(assumeUnit); + Vec axis = getAxis(assumeUnit); + return angle * axis; +} + +template +Vec Quat::toVec() const +{ + return Vec{w, x, y, z}; +} + +template +Quat Quat::lerp(const Quat &q0, const Quat &q1, const T t) +{ + return (1 - t) * q0 + t * q1; +} + +template +Quat Quat::slerp(const Quat &q0, const Quat &q1, const T t, QuatAssumeType assumeUnit, bool directChange) +{ + Quatd v0(q0); + Quatd v1(q1); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + } + T cosTheta = v0.dot(v1); + constexpr T DOT_THRESHOLD = 0.995; + if (cosTheta > DOT_THRESHOLD) + { + return nlerp(v0, v1, t, QUAT_ASSUME_UNIT); + } + + if (directChange && cosTheta < 0) + { + v0 = -v0; + cosTheta = -cosTheta; + } + T sinTheta = std::sqrt(1 - cosTheta * cosTheta); + T angle = atan2(sinTheta, cosTheta); + return (std::sin((1 - t) * angle) / (sinTheta) * v0 + std::sin(t * angle) / (sinTheta) * v1).normalize(); +} + + +template +inline Quat Quat::nlerp(const Quat &q0, const Quat &q1, const T t, QuatAssumeType assumeUnit) +{ + Quat v0(q0), v1(q1); + if (v1.dot(v0) < 0) + { + v0 = -v0; + } + if (assumeUnit) + { + return ((1 - t) * v0 + t * v1).normalize(); + } + v0 = v0.normalize(); + v1 = v1.normalize(); + return ((1 - t) * v0 + t * v1).normalize(); +} + + +template +inline bool Quat::isNormal(T eps) const +{ + + double normVar = norm(); + if ((normVar > 1 - eps) && (normVar < 1 + eps)) + return true; + return false; +} + +template +inline void Quat::assertNormal(T eps) const +{ + if (!isNormal(eps)) + CV_Error(Error::StsBadArg, "Quaternion should be normalized"); +} + + +template +inline Quat Quat::squad(const Quat &q0, const Quat &q1, + const Quat &q2, const Quat &q3, + const T t, QuatAssumeType assumeUnit, + bool directChange) +{ + Quat v0(q0), v1(q1), v2(q2), v3(q3); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + v2 = v2.normalize(); + v3 = v3.normalize(); + } + + Quat c0 = slerp(v0, v3, t, assumeUnit, directChange); + Quat c1 = slerp(v1, v2, t, assumeUnit, directChange); + return slerp(c0, c1, 2 * t * (1 - t), assumeUnit, directChange); +} + +template +Quat Quat::interPoint(const Quat &q0, const Quat &q1, + const Quat &q2, QuatAssumeType assumeUnit) +{ + Quat v0(q0), v1(q1), v2(q2); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + v2 = v2.normalize(); + } + return v1 * cv::exp(-(cv::log(v1.conjugate() * v0, assumeUnit) + (cv::log(v1.conjugate() * v2, assumeUnit))) / 4); +} + +template +Quat Quat::spline(const Quat &q0, const Quat &q1, const Quat &q2, const Quat &q3, const T t, QuatAssumeType assumeUnit) +{ + Quatd v0(q0), v1(q1), v2(q2), v3(q3); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + v2 = v2.normalize(); + v3 = v3.normalize(); + } + T cosTheta; + std::vector> vec{v0, v1, v2, v3}; + for (size_t i = 0; i < 3; ++i) + { + cosTheta = vec[i].dot(vec[i + 1]); + if (cosTheta < 0) + { + vec[i + 1] = -vec[i + 1]; + } + } + Quat s1 = interPoint(vec[0], vec[1], vec[2], QUAT_ASSUME_UNIT); + Quat s2 = interPoint(vec[1], vec[2], vec[3], QUAT_ASSUME_UNIT); + return squad(vec[1], s1, s2, vec[2], t, assumeUnit, QUAT_ASSUME_NOT_UNIT); +} + +} // namepsace +//! @endcond + +#endif /*OPENCV_CORE_QUATERNION_INL_HPP*/ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/saturate.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/saturate.hpp new file mode 100644 index 0000000..8127e3d --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/saturate.hpp @@ -0,0 +1,179 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2014, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_SATURATE_HPP +#define OPENCV_CORE_SATURATE_HPP + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/fast_math.hpp" + +namespace cv +{ + +//! @addtogroup core_utils +//! @{ + +/////////////// saturate_cast (used in image & signal processing) /////////////////// + +/** @brief Template function for accurate conversion from one primitive type to another. + + The function saturate_cast resembles the standard C++ cast operations, such as static_cast\() + and others. It perform an efficient and accurate conversion from one primitive type to another + (see the introduction chapter). saturate in the name means that when the input value v is out of the + range of the target type, the result is not formed just by taking low bits of the input, but instead + the value is clipped. For example: + @code + uchar a = saturate_cast(-100); // a = 0 (UCHAR_MIN) + short b = saturate_cast(33333.33333); // b = 32767 (SHRT_MAX) + @endcode + Such clipping is done when the target type is unsigned char , signed char , unsigned short or + signed short . For 32-bit integers, no clipping is done. + + When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit), + the floating-point value is first rounded to the nearest integer and then clipped if needed (when + the target type is 8- or 16-bit). + + @param v Function parameter. + @sa add, subtract, multiply, divide, Mat::convertTo + */ +template static inline _Tp saturate_cast(uchar v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(schar v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(ushort v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(short v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(unsigned v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(float v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(double v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int64 v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(uint64 v) { return _Tp(v); } + +template<> inline uchar saturate_cast(schar v) { return (uchar)std::max((int)v, 0); } +template<> inline uchar saturate_cast(ushort v) { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); } +template<> inline uchar saturate_cast(int v) { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(short v) { return saturate_cast((int)v); } +template<> inline uchar saturate_cast(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); } +template<> inline uchar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); } + +template<> inline schar saturate_cast(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); } +template<> inline schar saturate_cast(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); } +template<> inline schar saturate_cast(int v) { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(short v) { return saturate_cast((int)v); } +template<> inline schar saturate_cast(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); } +template<> inline schar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); } + +template<> inline ushort saturate_cast(schar v) { return (ushort)std::max((int)v, 0); } +template<> inline ushort saturate_cast(short v) { return (ushort)std::max((int)v, 0); } +template<> inline ushort saturate_cast(int v) { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); } +template<> inline ushort saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); } + +template<> inline short saturate_cast(ushort v) { return (short)std::min((int)v, SHRT_MAX); } +template<> inline short saturate_cast(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); } +template<> inline short saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } + +template<> inline int saturate_cast(unsigned v) { return (int)std::min(v, (unsigned)INT_MAX); } +template<> inline int saturate_cast(int64 v) { return (int)((uint64)(v - INT_MIN) <= (uint64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); } +template<> inline int saturate_cast(uint64 v) { return (int)std::min(v, (uint64)INT_MAX); } +template<> inline int saturate_cast(float v) { return cvRound(v); } +template<> inline int saturate_cast(double v) { return cvRound(v); } + +template<> inline unsigned saturate_cast(schar v) { return (unsigned)std::max(v, (schar)0); } +template<> inline unsigned saturate_cast(short v) { return (unsigned)std::max(v, (short)0); } +template<> inline unsigned saturate_cast(int v) { return (unsigned)std::max(v, (int)0); } +template<> inline unsigned saturate_cast(int64 v) { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); } +template<> inline unsigned saturate_cast(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); } +// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. +template<> inline unsigned saturate_cast(float v) { return static_cast(cvRound(v)); } +template<> inline unsigned saturate_cast(double v) { return static_cast(cvRound(v)); } + +template<> inline uint64 saturate_cast(schar v) { return (uint64)std::max(v, (schar)0); } +template<> inline uint64 saturate_cast(short v) { return (uint64)std::max(v, (short)0); } +template<> inline uint64 saturate_cast(int v) { return (uint64)std::max(v, (int)0); } +template<> inline uint64 saturate_cast(int64 v) { return (uint64)std::max(v, (int64)0); } + +template<> inline int64 saturate_cast(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); } + +/** @overload */ +template static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); } + +// in theory, we could use a LUT for 8u/8s->16f conversion, +// but with hardware support for FP32->FP16 conversion the current approach is preferable +template<> inline float16_t saturate_cast(uchar v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(schar v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(ushort v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(short v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(unsigned v){ return float16_t((float)v); } +template<> inline float16_t saturate_cast(int v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(uint64 v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(int64 v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(float v) { return float16_t(v); } +template<> inline float16_t saturate_cast(double v) { return float16_t((float)v); } + +//! @} + +} // cv + +#endif // OPENCV_CORE_SATURATE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/simd_intrinsics.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/simd_intrinsics.hpp new file mode 100644 index 0000000..c50923f --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/simd_intrinsics.hpp @@ -0,0 +1,88 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_SIMD_INTRINSICS_HPP +#define OPENCV_CORE_SIMD_INTRINSICS_HPP + +/** +Helper header to support SIMD intrinsics (universal intrinsics) in user code. +Intrinsics documentation: https://docs.opencv.org/master/df/d91/group__core__hal__intrin.html + + +Checks of target CPU instruction set based on compiler definitions don't work well enough. +More reliable solutions require utilization of configuration systems (like CMake). + +So, probably you need to specify your own configuration. + +You can do that via CMake in this way: + add_definitions(/DOPENCV_SIMD_CONFIG_HEADER=opencv_simd_config_custom.hpp) +or + add_definitions(/DOPENCV_SIMD_CONFIG_INCLUDE_DIR=1) + +Additionally you may need to add include directory to your files: + include_directories("${CMAKE_CURRENT_LIST_DIR}/opencv_config_${MYTARGET}") + +These files can be pre-generated for target configurations of your application +or generated by CMake on the fly (use CMAKE_BINARY_DIR for that). + +Notes: +- H/W capability checks are still responsibility of your application +- runtime dispatching is not covered by this helper header +*/ + +#ifdef __OPENCV_BUILD +#error "Use core/hal/intrin.hpp during OpenCV build" +#endif + +#ifdef OPENCV_HAL_INTRIN_HPP +#error "core/simd_intrinsics.hpp must be included before core/hal/intrin.hpp" +#endif + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/version.hpp" + +#ifdef OPENCV_SIMD_CONFIG_HEADER +#include CVAUX_STR(OPENCV_SIMD_CONFIG_HEADER) +#elif defined(OPENCV_SIMD_CONFIG_INCLUDE_DIR) +#include "opencv_simd_config.hpp" // corresponding directory should be added via -I compiler parameter +#else // custom config headers + +#if (!defined(CV_AVX_512F) || !CV_AVX_512F) && (defined(__AVX512__) || defined(__AVX512F__)) +# include +# undef CV_AVX_512F +# define CV_AVX_512F 1 +# ifndef OPENCV_SIMD_DONT_ASSUME_SKX // Skylake-X with AVX-512F/CD/BW/DQ/VL +# undef CV_AVX512_SKX +# define CV_AVX512_SKX 1 +# undef CV_AVX_512CD +# define CV_AVX_512CD 1 +# undef CV_AVX_512BW +# define CV_AVX_512BW 1 +# undef CV_AVX_512DQ +# define CV_AVX_512DQ 1 +# undef CV_AVX_512VL +# define CV_AVX_512VL 1 +# endif +#endif // AVX512 + +// GCC/Clang: -mavx2 +// MSVC: /arch:AVX2 +#if defined __AVX2__ +# include +# undef CV_AVX2 +# define CV_AVX2 1 +# if defined __F16C__ +# undef CV_FP16 +# define CV_FP16 1 +# endif +#endif + +#endif + +// SSE / NEON / VSX is handled by cv_cpu_dispatch.h compatibility block +#include "cv_cpu_dispatch.h" + +#include "hal/intrin.hpp" + +#endif // OPENCV_CORE_SIMD_INTRINSICS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/softfloat.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/softfloat.hpp new file mode 100644 index 0000000..485e15c --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/softfloat.hpp @@ -0,0 +1,514 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This file is based on files from package issued with the following license: + +/*============================================================================ + +This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3c, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#pragma once +#ifndef softfloat_h +#define softfloat_h 1 + +#include "cvdef.h" + +namespace cv +{ + +/** @addtogroup core_utils_softfloat + + [SoftFloat](http://www.jhauser.us/arithmetic/SoftFloat.html) is a software implementation + of floating-point calculations according to IEEE 754 standard. + All calculations are done in integers, that's why they are machine-independent and bit-exact. + This library can be useful in accuracy-critical parts like look-up tables generation, tests, etc. + OpenCV contains a subset of SoftFloat partially rewritten to C++. + + ### Types + + There are two basic types: @ref softfloat and @ref softdouble. + These types are binary compatible with float and double types respectively + and support conversions to/from them. + Other types from original SoftFloat library like fp16 or fp128 were thrown away + as well as quiet/signaling NaN support, on-the-fly rounding mode switch + and exception flags (though exceptions can be implemented in the future). + + ### Operations + + Both types support the following: + - Construction from signed and unsigned 32-bit and 64 integers, + float/double or raw binary representation + - Conversions between each other, to float or double and to int + using @ref cvRound, @ref cvTrunc, @ref cvFloor, @ref cvCeil or a bunch of + saturate_cast functions + - Add, subtract, multiply, divide, remainder, square root, FMA with absolute precision + - Comparison operations + - Explicit sign, exponent and significand manipulation through get/set methods, + number state indicators (isInf, isNan, isSubnormal) + - Type-specific constants like eps, minimum/maximum value, best pi approximation, etc. + - min(), max(), abs(), exp(), log() and pow() functions + +*/ +//! @{ + +struct softfloat; +struct softdouble; + +struct CV_EXPORTS softfloat +{ +public: + /** @brief Default constructor */ + softfloat() { v = 0; } + /** @brief Copy constructor */ + softfloat( const softfloat& c) { v = c.v; } + /** @brief Assign constructor */ + softfloat& operator=( const softfloat& c ) + { + if(&c != this) v = c.v; + return *this; + } + /** @brief Construct from raw + + Builds new value from raw binary representation + */ + static const softfloat fromRaw( const uint32_t a ) { softfloat x; x.v = a; return x; } + + /** @brief Construct from integer */ + explicit softfloat( const uint32_t ); + explicit softfloat( const uint64_t ); + explicit softfloat( const int32_t ); + explicit softfloat( const int64_t ); + +#ifdef CV_INT32_T_IS_LONG_INT + // for platforms with int32_t = long int + explicit softfloat( const int a ) { *this = softfloat(static_cast(a)); } +#endif + + /** @brief Construct from float */ + explicit softfloat( const float a ) { Cv32suf s; s.f = a; v = s.u; } + + /** @brief Type casts */ + operator softdouble() const; + operator float() const { Cv32suf s; s.u = v; return s.f; } + + /** @brief Basic arithmetics */ + softfloat operator + (const softfloat&) const; + softfloat operator - (const softfloat&) const; + softfloat operator * (const softfloat&) const; + softfloat operator / (const softfloat&) const; + softfloat operator - () const { softfloat x; x.v = v ^ (1U << 31); return x; } + + /** @brief Remainder operator + + A quote from original SoftFloat manual: + + > The IEEE Standard remainder operation computes the value + > a - n * b, where n is the integer closest to a / b. + > If a / b is exactly halfway between two integers, n is the even integer + > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding. + > Depending on the relative magnitudes of the operands, the remainder functions + > can take considerably longer to execute than the other SoftFloat functions. + > This is an inherent characteristic of the remainder operation itself and is not a flaw + > in the SoftFloat implementation. + */ + softfloat operator % (const softfloat&) const; + + softfloat& operator += (const softfloat& a) { *this = *this + a; return *this; } + softfloat& operator -= (const softfloat& a) { *this = *this - a; return *this; } + softfloat& operator *= (const softfloat& a) { *this = *this * a; return *this; } + softfloat& operator /= (const softfloat& a) { *this = *this / a; return *this; } + softfloat& operator %= (const softfloat& a) { *this = *this % a; return *this; } + + /** @brief Comparison operations + + - Any operation with NaN produces false + + The only exception is when x is NaN: x != y for any y. + - Positive and negative zeros are equal + */ + bool operator == ( const softfloat& ) const; + bool operator != ( const softfloat& ) const; + bool operator > ( const softfloat& ) const; + bool operator >= ( const softfloat& ) const; + bool operator < ( const softfloat& ) const; + bool operator <= ( const softfloat& ) const; + + /** @brief NaN state indicator */ + inline bool isNaN() const { return (v & 0x7fffffff) > 0x7f800000; } + /** @brief Inf state indicator */ + inline bool isInf() const { return (v & 0x7fffffff) == 0x7f800000; } + /** @brief Subnormal number indicator */ + inline bool isSubnormal() const { return ((v >> 23) & 0xFF) == 0; } + + /** @brief Get sign bit */ + inline bool getSign() const { return (v >> 31) != 0; } + /** @brief Construct a copy with new sign bit */ + inline softfloat setSign(bool sign) const { softfloat x; x.v = (v & ((1U << 31) - 1)) | ((uint32_t)sign << 31); return x; } + /** @brief Get 0-based exponent */ + inline int getExp() const { return ((v >> 23) & 0xFF) - 127; } + /** @brief Construct a copy with new 0-based exponent */ + inline softfloat setExp(int e) const { softfloat x; x.v = (v & 0x807fffff) | (((e + 127) & 0xFF) << 23 ); return x; } + + /** @brief Get a fraction part + + Returns a number 1 <= x < 2 with the same significand + */ + inline softfloat getFrac() const + { + uint_fast32_t vv = (v & 0x007fffff) | (127 << 23); + return softfloat::fromRaw(vv); + } + /** @brief Construct a copy with provided significand + + Constructs a copy of a number with significand taken from parameter + */ + inline softfloat setFrac(const softfloat& s) const + { + softfloat x; + x.v = (v & 0xff800000) | (s.v & 0x007fffff); + return x; + } + + /** @brief Zero constant */ + static softfloat zero() { return softfloat::fromRaw( 0 ); } + /** @brief Positive infinity constant */ + static softfloat inf() { return softfloat::fromRaw( 0xFF << 23 ); } + /** @brief Default NaN constant */ + static softfloat nan() { return softfloat::fromRaw( 0x7fffffff ); } + /** @brief One constant */ + static softfloat one() { return softfloat::fromRaw( 127 << 23 ); } + /** @brief Smallest normalized value */ + static softfloat min() { return softfloat::fromRaw( 0x01 << 23 ); } + /** @brief Difference between 1 and next representable value */ + static softfloat eps() { return softfloat::fromRaw( (127 - 23) << 23 ); } + /** @brief Biggest finite value */ + static softfloat max() { return softfloat::fromRaw( (0xFF << 23) - 1 ); } + /** @brief Correct pi approximation */ + static softfloat pi() { return softfloat::fromRaw( 0x40490fdb ); } + + uint32_t v; +}; + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +struct CV_EXPORTS softdouble +{ +public: + /** @brief Default constructor */ + softdouble() : v(0) { } + /** @brief Copy constructor */ + softdouble( const softdouble& c) { v = c.v; } + /** @brief Assign constructor */ + softdouble& operator=( const softdouble& c ) + { + if(&c != this) v = c.v; + return *this; + } + /** @brief Construct from raw + + Builds new value from raw binary representation + */ + static softdouble fromRaw( const uint64_t a ) { softdouble x; x.v = a; return x; } + + /** @brief Construct from integer */ + explicit softdouble( const uint32_t ); + explicit softdouble( const uint64_t ); + explicit softdouble( const int32_t ); + explicit softdouble( const int64_t ); + +#ifdef CV_INT32_T_IS_LONG_INT + // for platforms with int32_t = long int + explicit softdouble( const int a ) { *this = softdouble(static_cast(a)); } +#endif + + /** @brief Construct from double */ + explicit softdouble( const double a ) { Cv64suf s; s.f = a; v = s.u; } + + /** @brief Type casts */ + operator softfloat() const; + operator double() const { Cv64suf s; s.u = v; return s.f; } + + /** @brief Basic arithmetics */ + softdouble operator + (const softdouble&) const; + softdouble operator - (const softdouble&) const; + softdouble operator * (const softdouble&) const; + softdouble operator / (const softdouble&) const; + softdouble operator - () const { softdouble x; x.v = v ^ (1ULL << 63); return x; } + + /** @brief Remainder operator + + A quote from original SoftFloat manual: + + > The IEEE Standard remainder operation computes the value + > a - n * b, where n is the integer closest to a / b. + > If a / b is exactly halfway between two integers, n is the even integer + > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding. + > Depending on the relative magnitudes of the operands, the remainder functions + > can take considerably longer to execute than the other SoftFloat functions. + > This is an inherent characteristic of the remainder operation itself and is not a flaw + > in the SoftFloat implementation. + */ + softdouble operator % (const softdouble&) const; + + softdouble& operator += (const softdouble& a) { *this = *this + a; return *this; } + softdouble& operator -= (const softdouble& a) { *this = *this - a; return *this; } + softdouble& operator *= (const softdouble& a) { *this = *this * a; return *this; } + softdouble& operator /= (const softdouble& a) { *this = *this / a; return *this; } + softdouble& operator %= (const softdouble& a) { *this = *this % a; return *this; } + + /** @brief Comparison operations + + - Any operation with NaN produces false + + The only exception is when x is NaN: x != y for any y. + - Positive and negative zeros are equal + */ + bool operator == ( const softdouble& ) const; + bool operator != ( const softdouble& ) const; + bool operator > ( const softdouble& ) const; + bool operator >= ( const softdouble& ) const; + bool operator < ( const softdouble& ) const; + bool operator <= ( const softdouble& ) const; + + /** @brief NaN state indicator */ + inline bool isNaN() const { return (v & 0x7fffffffffffffff) > 0x7ff0000000000000; } + /** @brief Inf state indicator */ + inline bool isInf() const { return (v & 0x7fffffffffffffff) == 0x7ff0000000000000; } + /** @brief Subnormal number indicator */ + inline bool isSubnormal() const { return ((v >> 52) & 0x7FF) == 0; } + + /** @brief Get sign bit */ + inline bool getSign() const { return (v >> 63) != 0; } + /** @brief Construct a copy with new sign bit */ + softdouble setSign(bool sign) const { softdouble x; x.v = (v & ((1ULL << 63) - 1)) | ((uint_fast64_t)(sign) << 63); return x; } + /** @brief Get 0-based exponent */ + inline int getExp() const { return ((v >> 52) & 0x7FF) - 1023; } + /** @brief Construct a copy with new 0-based exponent */ + inline softdouble setExp(int e) const + { + softdouble x; + x.v = (v & 0x800FFFFFFFFFFFFF) | ((uint_fast64_t)((e + 1023) & 0x7FF) << 52); + return x; + } + + /** @brief Get a fraction part + + Returns a number 1 <= x < 2 with the same significand + */ + inline softdouble getFrac() const + { + uint_fast64_t vv = (v & 0x000FFFFFFFFFFFFF) | ((uint_fast64_t)(1023) << 52); + return softdouble::fromRaw(vv); + } + /** @brief Construct a copy with provided significand + + Constructs a copy of a number with significand taken from parameter + */ + inline softdouble setFrac(const softdouble& s) const + { + softdouble x; + x.v = (v & 0xFFF0000000000000) | (s.v & 0x000FFFFFFFFFFFFF); + return x; + } + + /** @brief Zero constant */ + static softdouble zero() { return softdouble::fromRaw( 0 ); } + /** @brief Positive infinity constant */ + static softdouble inf() { return softdouble::fromRaw( (uint_fast64_t)(0x7FF) << 52 ); } + /** @brief Default NaN constant */ + static softdouble nan() { return softdouble::fromRaw( CV_BIG_INT(0x7FFFFFFFFFFFFFFF) ); } + /** @brief One constant */ + static softdouble one() { return softdouble::fromRaw( (uint_fast64_t)( 1023) << 52 ); } + /** @brief Smallest normalized value */ + static softdouble min() { return softdouble::fromRaw( (uint_fast64_t)( 0x01) << 52 ); } + /** @brief Difference between 1 and next representable value */ + static softdouble eps() { return softdouble::fromRaw( (uint_fast64_t)( 1023 - 52 ) << 52 ); } + /** @brief Biggest finite value */ + static softdouble max() { return softdouble::fromRaw( ((uint_fast64_t)(0x7FF) << 52) - 1 ); } + /** @brief Correct pi approximation */ + static softdouble pi() { return softdouble::fromRaw( CV_BIG_INT(0x400921FB54442D18) ); } + + uint64_t v; +}; + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +/** @brief Fused Multiplication and Addition + +Computes (a*b)+c with single rounding +*/ +CV_EXPORTS softfloat mulAdd( const softfloat& a, const softfloat& b, const softfloat & c); +CV_EXPORTS softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c); + +/** @brief Square root */ +CV_EXPORTS softfloat sqrt( const softfloat& a ); +CV_EXPORTS softdouble sqrt( const softdouble& a ); +} + +/*---------------------------------------------------------------------------- +| Ported from OpenCV and added for usability +*----------------------------------------------------------------------------*/ + +/** @brief Truncates number to integer with minimum magnitude */ +CV_EXPORTS int cvTrunc(const cv::softfloat& a); +CV_EXPORTS int cvTrunc(const cv::softdouble& a); + +/** @brief Rounds a number to nearest even integer */ +CV_EXPORTS int cvRound(const cv::softfloat& a); +CV_EXPORTS int cvRound(const cv::softdouble& a); + +/** @brief Rounds a number to nearest even long long integer */ +CV_EXPORTS int64_t cvRound64(const cv::softdouble& a); + +/** @brief Rounds a number down to integer */ +CV_EXPORTS int cvFloor(const cv::softfloat& a); +CV_EXPORTS int cvFloor(const cv::softdouble& a); + +/** @brief Rounds number up to integer */ +CV_EXPORTS int cvCeil(const cv::softfloat& a); +CV_EXPORTS int cvCeil(const cv::softdouble& a); + +namespace cv +{ +/** @brief Saturate casts */ +template static inline _Tp saturate_cast(softfloat a) { return _Tp(a); } +template static inline _Tp saturate_cast(softdouble a) { return _Tp(a); } + +template<> inline uchar saturate_cast(softfloat a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); } +template<> inline uchar saturate_cast(softdouble a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); } + +template<> inline schar saturate_cast(softfloat a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); } +template<> inline schar saturate_cast(softdouble a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); } + +template<> inline ushort saturate_cast(softfloat a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); } +template<> inline ushort saturate_cast(softdouble a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); } + +template<> inline short saturate_cast(softfloat a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); } +template<> inline short saturate_cast(softdouble a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); } + +template<> inline int saturate_cast(softfloat a) { return cvRound(a); } +template<> inline int saturate_cast(softdouble a) { return cvRound(a); } + +template<> inline int64_t saturate_cast(softfloat a) { return cvRound(a); } +template<> inline int64_t saturate_cast(softdouble a) { return cvRound64(a); } + +/** @brief Saturate cast to unsigned integer and unsigned long long integer +We intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. +*/ +template<> inline unsigned saturate_cast(softfloat a) { return cvRound(a); } +template<> inline unsigned saturate_cast(softdouble a) { return cvRound(a); } + +template<> inline uint64_t saturate_cast(softfloat a) { return cvRound(a); } +template<> inline uint64_t saturate_cast(softdouble a) { return cvRound64(a); } + +/** @brief Min and Max functions */ +inline softfloat min(const softfloat& a, const softfloat& b) { return (a > b) ? b : a; } +inline softdouble min(const softdouble& a, const softdouble& b) { return (a > b) ? b : a; } + +inline softfloat max(const softfloat& a, const softfloat& b) { return (a > b) ? a : b; } +inline softdouble max(const softdouble& a, const softdouble& b) { return (a > b) ? a : b; } + +/** @brief Absolute value */ +inline softfloat abs( softfloat a) { softfloat x; x.v = a.v & ((1U << 31) - 1); return x; } +inline softdouble abs( softdouble a) { softdouble x; x.v = a.v & ((1ULL << 63) - 1); return x; } + +/** @brief Exponent + +Special cases: +- exp(NaN) is NaN +- exp(-Inf) == 0 +- exp(+Inf) == +Inf +*/ +CV_EXPORTS softfloat exp( const softfloat& a); +CV_EXPORTS softdouble exp( const softdouble& a); + +/** @brief Natural logarithm + +Special cases: +- log(NaN), log(x < 0) are NaN +- log(0) == -Inf +*/ +CV_EXPORTS softfloat log( const softfloat& a ); +CV_EXPORTS softdouble log( const softdouble& a ); + +/** @brief Raising to the power + +Special cases: +- x**NaN is NaN for any x +- ( |x| == 1 )**Inf is NaN +- ( |x| > 1 )**+Inf or ( |x| < 1 )**-Inf is +Inf +- ( |x| > 1 )**-Inf or ( |x| < 1 )**+Inf is 0 +- x ** 0 == 1 for any x +- x ** 1 == 1 for any x +- NaN ** y is NaN for any other y +- Inf**(y < 0) == 0 +- Inf ** y is +Inf for any other y +- (x < 0)**y is NaN for any other y if x can't be correctly rounded to integer +- 0 ** 0 == 1 +- 0 ** (y < 0) is +Inf +- 0 ** (y > 0) is 0 +*/ +CV_EXPORTS softfloat pow( const softfloat& a, const softfloat& b); +CV_EXPORTS softdouble pow( const softdouble& a, const softdouble& b); + +/** @brief Cube root + +Special cases: +- cbrt(NaN) is NaN +- cbrt(+/-Inf) is +/-Inf +*/ +CV_EXPORTS softfloat cbrt( const softfloat& a ); + +/** @brief Sine + +Special cases: +- sin(Inf) or sin(NaN) is NaN +- sin(x) == x when sin(x) is close to zero +*/ +CV_EXPORTS softdouble sin( const softdouble& a ); + +/** @brief Cosine + * +Special cases: +- cos(Inf) or cos(NaN) is NaN +- cos(x) == +/- 1 when cos(x) is close to +/- 1 +*/ +CV_EXPORTS softdouble cos( const softdouble& a ); + +//! @} core_utils_softfloat + +} // cv:: + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/sse_utils.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/sse_utils.hpp new file mode 100644 index 0000000..0906583 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/sse_utils.hpp @@ -0,0 +1,652 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_SSE_UTILS_HPP +#define OPENCV_CORE_SSE_UTILS_HPP + +#ifndef __cplusplus +# error sse_utils.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils_sse +//! @{ + +#if CV_SSE2 + +inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g0); + __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g0); + __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_g1); + __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_g1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk3); + __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk3); + + __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk3); + __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk3); + + __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk2); + __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk2); + __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk3); + __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk3); + + v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk2); + v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk2); + v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk3); + v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk3); +} + +inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g1); + __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g1); + __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b0); + __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b0); + __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_b1); + __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_b1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk5); + __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk5); + + __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk5); + __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk5); + + __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk3); + __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk3); + __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk4); + __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk4); + __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk5); + __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk5); + + v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk3); + v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk3); + v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk4); + v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk4); + v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk5); + v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk5); +} + +inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0); + __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0); + __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1); + __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b1); + __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_a0); + __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_a0); + __m128i layer1_chunk6 = _mm_unpacklo_epi8(v_g1, v_a1); + __m128i layer1_chunk7 = _mm_unpackhi_epi8(v_g1, v_a1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk6 = _mm_unpacklo_epi8(layer1_chunk3, layer1_chunk7); + __m128i layer2_chunk7 = _mm_unpackhi_epi8(layer1_chunk3, layer1_chunk7); + + __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk6 = _mm_unpacklo_epi8(layer2_chunk3, layer2_chunk7); + __m128i layer3_chunk7 = _mm_unpackhi_epi8(layer2_chunk3, layer2_chunk7); + + __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk4); + __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk4); + __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk5); + __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk5); + __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk6); + __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk6); + __m128i layer4_chunk6 = _mm_unpacklo_epi8(layer3_chunk3, layer3_chunk7); + __m128i layer4_chunk7 = _mm_unpackhi_epi8(layer3_chunk3, layer3_chunk7); + + v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk4); + v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk4); + v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk5); + v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk5); + v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk6); + v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk6); + v_a0 = _mm_unpacklo_epi8(layer4_chunk3, layer4_chunk7); + v_a1 = _mm_unpackhi_epi8(layer4_chunk3, layer4_chunk7); +} + +inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i v_mask = _mm_set1_epi16(0x00ff); + + __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); + __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); + + __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); + __m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); + __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); + + __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); + __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); + + __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); + __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); + + v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); + v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); +} + +inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i v_mask = _mm_set1_epi16(0x00ff); + + __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); + __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); + __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8)); + + __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); + __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); + __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8)); + + __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); + __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); + __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8)); + + __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); + __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); + __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8)); + + v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); + v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); + v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8)); +} + +inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i v_mask = _mm_set1_epi16(0x00ff); + + __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); + __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); + __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer4_chunk6 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8)); + __m128i layer4_chunk3 = _mm_packus_epi16(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask)); + __m128i layer4_chunk7 = _mm_packus_epi16(_mm_srli_epi16(v_a0, 8), _mm_srli_epi16(v_a1, 8)); + + __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); + __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); + __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask)); + __m128i layer3_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8)); + __m128i layer3_chunk3 = _mm_packus_epi16(_mm_and_si128(layer4_chunk6, v_mask), _mm_and_si128(layer4_chunk7, v_mask)); + __m128i layer3_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk6, 8), _mm_srli_epi16(layer4_chunk7, 8)); + + __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); + __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); + __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8)); + __m128i layer2_chunk3 = _mm_packus_epi16(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask)); + __m128i layer2_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk6, 8), _mm_srli_epi16(layer3_chunk7, 8)); + + __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); + __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); + __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8)); + __m128i layer1_chunk3 = _mm_packus_epi16(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask)); + __m128i layer1_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk6, 8), _mm_srli_epi16(layer2_chunk7, 8)); + + v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); + v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); + v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_a0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8)); + v_g1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask)); + v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8)); +} + +inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0); + __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0); + __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1); + __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_g1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk3); + __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk3); + + __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk3); + __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk3); + + v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk2); + v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk2); + v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk3); + v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk3); +} + +inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1); + __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1); + __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0); + __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b0); + __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_b1); + __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_b1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk5); + __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk5); + + __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk5); + __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk5); + + v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk3); + v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk3); + v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk4); + v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk4); + v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk5); + v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk5); +} + +inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0); + __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0); + __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b1); + __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b1); + __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_a0); + __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0); + __m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1); + __m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk6 = _mm_unpacklo_epi16(layer1_chunk3, layer1_chunk7); + __m128i layer2_chunk7 = _mm_unpackhi_epi16(layer1_chunk3, layer1_chunk7); + + __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk6 = _mm_unpacklo_epi16(layer2_chunk3, layer2_chunk7); + __m128i layer3_chunk7 = _mm_unpackhi_epi16(layer2_chunk3, layer2_chunk7); + + v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk4); + v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk4); + v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk5); + v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk5); + v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk6); + v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk6); + v_a0 = _mm_unpacklo_epi16(layer3_chunk3, layer3_chunk7); + v_a1 = _mm_unpackhi_epi16(layer3_chunk3, layer3_chunk7); +} + +#if CV_SSE4_1 + +inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i v_mask = _mm_set1_epi32(0x0000ffff); + + __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer3_chunk2 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); + __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); + + __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); + __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); + + __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); + __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); + + v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); + v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); +} + +inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i v_mask = _mm_set1_epi32(0x0000ffff); + + __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); + __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); + __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16)); + + __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); + __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); + __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16)); + + __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); + __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); + __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16)); + + v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); + v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); + v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16)); +} + +inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i v_mask = _mm_set1_epi32(0x0000ffff); + + __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); + __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); + __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer3_chunk6 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16)); + __m128i layer3_chunk3 = _mm_packus_epi32(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask)); + __m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16)); + + __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); + __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); + __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16)); + __m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask)); + __m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16)); + + __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); + __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); + __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16)); + __m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask)); + __m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16)); + + v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); + v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); + v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16)); + v_g1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask)); + v_a1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk6, 16), _mm_srli_epi32(layer1_chunk7, 16)); +} + +#endif // CV_SSE4_1 + +inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) +{ + __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0); + __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0); + __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1); + __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1); + + __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2); + __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2); + __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3); + __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk3); + + v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk2); + v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk2); + v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk3); + v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk3); +} + +inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, + __m128 & v_g1, __m128 & v_b0, __m128 & v_b1) +{ + __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1); + __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1); + __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0); + __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0); + __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1); + __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1); + + __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3); + __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3); + __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4); + __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk4); + __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk5); + __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk5); + + v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk3); + v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk3); + v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk4); + v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk4); + v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk5); + v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk5); +} + +inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, + __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) +{ + __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0); + __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0); + __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1); + __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b1); + __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_a0); + __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_a0); + __m128 layer1_chunk6 = _mm_unpacklo_ps(v_g1, v_a1); + __m128 layer1_chunk7 = _mm_unpackhi_ps(v_g1, v_a1); + + __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk4); + __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk4); + __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk5); + __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk5); + __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk6); + __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk6); + __m128 layer2_chunk6 = _mm_unpacklo_ps(layer1_chunk3, layer1_chunk7); + __m128 layer2_chunk7 = _mm_unpackhi_ps(layer1_chunk3, layer1_chunk7); + + v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk4); + v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk4); + v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk5); + v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk5); + v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk6); + v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk6); + v_a0 = _mm_unpacklo_ps(layer2_chunk3, layer2_chunk7); + v_a1 = _mm_unpackhi_ps(layer2_chunk3, layer2_chunk7); +} + +inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) +{ + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; + + __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); + __m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); + __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); + __m128 layer2_chunk3 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); + + __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); + __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); + __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); + __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); + + v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); + v_g0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); + v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); + v_g1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); +} + +inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, + __m128 & v_g1, __m128 & v_b0, __m128 & v_b1) +{ + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; + + __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); + __m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); + __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); + __m128 layer2_chunk4 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); + __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo); + __m128 layer2_chunk5 = _mm_shuffle_ps(v_b0, v_b1, mask_hi); + + __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); + __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); + __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); + __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); + __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo); + __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi); + + v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); + v_g1 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); + v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); + v_b0 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); + v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo); + v_b1 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi); +} + +inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, + __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) +{ + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; + + __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); + __m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); + __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); + __m128 layer2_chunk5 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); + __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo); + __m128 layer2_chunk6 = _mm_shuffle_ps(v_b0, v_b1, mask_hi); + __m128 layer2_chunk3 = _mm_shuffle_ps(v_a0, v_a1, mask_lo); + __m128 layer2_chunk7 = _mm_shuffle_ps(v_a0, v_a1, mask_hi); + + __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); + __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); + __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); + __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); + __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo); + __m128 layer1_chunk6 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi); + __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_lo); + __m128 layer1_chunk7 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_hi); + + v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); + v_b0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); + v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); + v_b1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); + v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo); + v_a0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi); + v_g1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_lo); + v_a1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_hi); +} + +#endif // CV_SSE2 + +//! @} + +#endif //OPENCV_CORE_SSE_UTILS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/traits.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/traits.hpp new file mode 100644 index 0000000..52ab083 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/traits.hpp @@ -0,0 +1,417 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_TRAITS_HPP +#define OPENCV_CORE_TRAITS_HPP + +#include "opencv2/core/cvdef.h" + +namespace cv +{ + +//#define OPENCV_TRAITS_ENABLE_DEPRECATED + +//! @addtogroup core_basic +//! @{ + +/** @brief Template "trait" class for OpenCV primitive data types. + +@note Deprecated. This is replaced by "single purpose" traits: traits::Type and traits::Depth + +A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed +short, int, float, double, or a tuple of values of one of these types, where all the values in the +tuple have the same type. Any primitive type from the list can be defined by an identifier in the +form CV_\{U|S|F}C(\), for example: uchar \~ CV_8UC1, 3-element +floating-point tuple \~ CV_32FC3, and so on. A universal OpenCV structure that is able to store a +single instance of such a primitive data type is Vec. Multiple instances of such a type can be +stored in a std::vector, Mat, Mat_, SparseMat, SparseMat_, or any other container that is able to +store Vec instances. + +The DataType class is basically used to provide a description of such primitive data types without +adding any fields or methods to the corresponding classes (and it is actually impossible to add +anything to primitive C/C++ data types). This technique is known in C++ as class traits. It is not +DataType itself that is used but its specialized versions, such as: +@code + template<> class DataType + { + typedef uchar value_type; + typedef int work_type; + typedef uchar channel_type; + enum { channel_type = CV_8U, channels = 1, fmt='u', type = CV_8U }; + }; + ... + template DataType > + { + typedef std::complex<_Tp> value_type; + typedef std::complex<_Tp> work_type; + typedef _Tp channel_type; + // DataDepth is another helper trait class + enum { depth = DataDepth<_Tp>::value, channels=2, + fmt=(channels-1)*256+DataDepth<_Tp>::fmt, + type=CV_MAKETYPE(depth, channels) }; + }; + ... +@endcode +The main purpose of this class is to convert compilation-time type information to an +OpenCV-compatible data type identifier, for example: +@code + // allocates a 30x40 floating-point matrix + Mat A(30, 40, DataType::type); + + Mat B = Mat_ >(3, 3); + // the statement below will print 6, 2 , that is depth == CV_64F, channels == 2 + cout << B.depth() << ", " << B.channels() << endl; +@endcode +So, such traits are used to tell OpenCV which data type you are working with, even if such a type is +not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV +defines the proper specialized template class DataType\ \> . This mechanism is also +useful (and used in OpenCV this way) for generic algorithms implementations. + +@note Default values were dropped to stop confusing developers about using of unsupported types (see #7599) +*/ +template class DataType +{ +public: +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + typedef _Tp value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 1, + depth = -1, + channels = 1, + fmt = 0, + type = CV_MAKETYPE(depth, channels) + }; +#endif +}; + +template<> class DataType +{ +public: + typedef bool value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8U, + channels = 1, + fmt = (int)'u', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef uchar value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8U, + channels = 1, + fmt = (int)'u', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef schar value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8S, + channels = 1, + fmt = (int)'c', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef schar value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8S, + channels = 1, + fmt = (int)'c', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef ushort value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16U, + channels = 1, + fmt = (int)'w', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef short value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16S, + channels = 1, + fmt = (int)'s', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef int value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_32S, + channels = 1, + fmt = (int)'i', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef float value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_32F, + channels = 1, + fmt = (int)'f', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef double value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_64F, + channels = 1, + fmt = (int)'d', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef float16_t value_type; + typedef float work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16F, + channels = 1, + fmt = (int)'h', + type = CV_MAKETYPE(depth, channels) + }; +}; + +/** @brief A helper class for cv::DataType + +The class is specialized for each fundamental numerical data type supported by OpenCV. It provides +DataDepth::value constant. +*/ +template class DataDepth +{ +public: + enum + { + value = DataType<_Tp>::depth, + fmt = DataType<_Tp>::fmt + }; +}; + + +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + +template class TypeDepth +{ +#ifdef OPENCV_TRAITS_ENABLE_LEGACY_DEFAULTS + enum { depth = CV_USRTYPE1 }; + typedef void value_type; +#endif +}; + +template<> class TypeDepth +{ + enum { depth = CV_8U }; + typedef uchar value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_8S }; + typedef schar value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_16U }; + typedef ushort value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_16S }; + typedef short value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_32S }; + typedef int value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_32F }; + typedef float value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_64F }; + typedef double value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_16F }; + typedef float16_t value_type; +}; + +#endif + +//! @} + +namespace traits { + +namespace internal { +#define CV_CREATE_MEMBER_CHECK(X) \ +template class CheckMember_##X { \ + struct Fallback { int X; }; \ + struct Derived : T, Fallback { }; \ + template struct Check; \ + typedef char CV_NO[1]; \ + typedef char CV_YES[2]; \ + template static CV_NO & func(Check *); \ + template static CV_YES & func(...); \ +public: \ + typedef CheckMember_##X type; \ + enum { value = sizeof(func(0)) == sizeof(CV_YES) }; \ +}; + +CV_CREATE_MEMBER_CHECK(fmt) +CV_CREATE_MEMBER_CHECK(type) + +} // namespace internal + + +template +struct Depth +{ enum { value = DataType::depth }; }; + +template +struct Type +{ enum { value = DataType::type }; }; + +/** Similar to traits::Type but has value = -1 in case of unknown type (instead of compiler error) */ +template >::value > +struct SafeType {}; + +template +struct SafeType +{ enum { value = -1 }; }; + +template +struct SafeType +{ enum { value = Type::value }; }; + + +template >::value > +struct SafeFmt {}; + +template +struct SafeFmt +{ enum { fmt = 0 }; }; + +template +struct SafeFmt +{ enum { fmt = DataType::fmt }; }; + + +} // namespace + +} // cv + +#endif // OPENCV_CORE_TRAITS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/types.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/types.hpp new file mode 100644 index 0000000..819fd52 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/types.hpp @@ -0,0 +1,2476 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_TYPES_HPP +#define OPENCV_CORE_TYPES_HPP + +#ifndef __cplusplus +# error types.hpp header must be compiled as C++ +#endif + +#include +#include +#include +#include + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/matx.hpp" + +namespace cv +{ + +//! @addtogroup core_basic +//! @{ + +//////////////////////////////// Complex ////////////////////////////// + +/** @brief A complex number class. + + The template class is similar and compatible with std::complex, however it provides slightly + more convenient access to the real and imaginary parts using through the simple field access, as opposite + to std::complex::real() and std::complex::imag(). +*/ +template class Complex +{ +public: + + //! default constructor + Complex(); + Complex( _Tp _re, _Tp _im = 0 ); + + //! conversion to another data type + template operator Complex() const; + //! conjugation + Complex conj() const; + + _Tp re, im; //< the real and the imaginary parts +}; + +typedef Complex Complexf; +typedef Complex Complexd; + +template class DataType< Complex<_Tp> > +{ +public: + typedef Complex<_Tp> value_type; + typedef value_type work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Complex<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Complex<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace + + +//////////////////////////////// Point_ //////////////////////////////// + +/** @brief Template class for 2D points specified by its coordinates `x` and `y`. + +An instance of the class is interchangeable with C structures, CvPoint and CvPoint2D32f . There is +also a cast operator to convert point coordinates to the specified type. The conversion from +floating-point coordinates to integer coordinates is done by rounding. Commonly, the conversion +uses this operation for each of the coordinates. Besides the class members listed in the +declaration above, the following operations on points are implemented: +@code + pt1 = pt2 + pt3; + pt1 = pt2 - pt3; + pt1 = pt2 * a; + pt1 = a * pt2; + pt1 = pt2 / a; + pt1 += pt2; + pt1 -= pt2; + pt1 *= a; + pt1 /= a; + double value = norm(pt); // L2 norm + pt1 == pt2; + pt1 != pt2; +@endcode +For your convenience, the following type aliases are defined: +@code + typedef Point_ Point2i; + typedef Point2i Point; + typedef Point_ Point2f; + typedef Point_ Point2d; +@endcode +Example: +@code + Point2f a(0.3f, 0.f), b(0.f, 0.4f); + Point pt = (a + b)*10.f; + cout << pt.x << ", " << pt.y << endl; +@endcode +*/ +template class Point_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Point_(); + Point_(_Tp _x, _Tp _y); + Point_(const Point_& pt); + Point_(Point_&& pt) CV_NOEXCEPT; + Point_(const Size_<_Tp>& sz); + Point_(const Vec<_Tp, 2>& v); + + Point_& operator = (const Point_& pt); + Point_& operator = (Point_&& pt) CV_NOEXCEPT; + //! conversion to another data type + template operator Point_<_Tp2>() const; + + //! conversion to the old-style C structures + operator Vec<_Tp, 2>() const; + + //! dot product + _Tp dot(const Point_& pt) const; + //! dot product computed in double-precision arithmetics + double ddot(const Point_& pt) const; + //! cross-product + double cross(const Point_& pt) const; + //! checks whether the point is inside the specified rectangle + bool inside(const Rect_<_Tp>& r) const; + _Tp x; //!< x coordinate of the point + _Tp y; //!< y coordinate of the point +}; + +typedef Point_ Point2i; +typedef Point_ Point2l; +typedef Point_ Point2f; +typedef Point_ Point2d; +typedef Point2i Point; + +template class DataType< Point_<_Tp> > +{ +public: + typedef Point_<_Tp> value_type; + typedef Point_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Point_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Point_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace + + +//////////////////////////////// Point3_ //////////////////////////////// + +/** @brief Template class for 3D points specified by its coordinates `x`, `y` and `z`. + +An instance of the class is interchangeable with the C structure CvPoint2D32f . Similarly to +Point_ , the coordinates of 3D points can be converted to another type. The vector arithmetic and +comparison operations are also supported. + +The following Point3_\<\> aliases are available: +@code + typedef Point3_ Point3i; + typedef Point3_ Point3f; + typedef Point3_ Point3d; +@endcode +@see cv::Point3i, cv::Point3f and cv::Point3d +*/ +template class Point3_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Point3_(); + Point3_(_Tp _x, _Tp _y, _Tp _z); + Point3_(const Point3_& pt); + Point3_(Point3_&& pt) CV_NOEXCEPT; + explicit Point3_(const Point_<_Tp>& pt); + Point3_(const Vec<_Tp, 3>& v); + + Point3_& operator = (const Point3_& pt); + Point3_& operator = (Point3_&& pt) CV_NOEXCEPT; + //! conversion to another data type + template operator Point3_<_Tp2>() const; + //! conversion to cv::Vec<> + operator Vec<_Tp, 3>() const; + + //! dot product + _Tp dot(const Point3_& pt) const; + //! dot product computed in double-precision arithmetics + double ddot(const Point3_& pt) const; + //! cross product of the 2 3D points + Point3_ cross(const Point3_& pt) const; + _Tp x; //!< x coordinate of the 3D point + _Tp y; //!< y coordinate of the 3D point + _Tp z; //!< z coordinate of the 3D point +}; + +typedef Point3_ Point3i; +typedef Point3_ Point3f; +typedef Point3_ Point3d; + +template class DataType< Point3_<_Tp> > +{ +public: + typedef Point3_<_Tp> value_type; + typedef Point3_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 3, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Point3_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Point3_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 3) }; }; +} // namespace + +//////////////////////////////// Size_ //////////////////////////////// + +/** @brief Template class for specifying the size of an image or rectangle. + +The class includes two members called width and height. The structure can be converted to and from +the old OpenCV structures CvSize and CvSize2D32f . The same set of arithmetic and comparison +operations as for Point_ is available. + +OpenCV defines the following Size_\<\> aliases: +@code + typedef Size_ Size2i; + typedef Size2i Size; + typedef Size_ Size2f; +@endcode +*/ +template class Size_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Size_(); + Size_(_Tp _width, _Tp _height); + Size_(const Size_& sz); + Size_(Size_&& sz) CV_NOEXCEPT; + Size_(const Point_<_Tp>& pt); + + Size_& operator = (const Size_& sz); + Size_& operator = (Size_&& sz) CV_NOEXCEPT; + //! the area (width*height) + _Tp area() const; + //! aspect ratio (width/height) + double aspectRatio() const; + //! true if empty + bool empty() const; + + //! conversion of another data type. + template operator Size_<_Tp2>() const; + + _Tp width; //!< the width + _Tp height; //!< the height +}; + +typedef Size_ Size2i; +typedef Size_ Size2l; +typedef Size_ Size2f; +typedef Size_ Size2d; +typedef Size2i Size; + +template class DataType< Size_<_Tp> > +{ +public: + typedef Size_<_Tp> value_type; + typedef Size_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Size_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Size_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace + +//////////////////////////////// Rect_ //////////////////////////////// + +/** @brief Template class for 2D rectangles + +described by the following parameters: +- Coordinates of the top-left corner. This is a default interpretation of Rect_::x and Rect_::y + in OpenCV. Though, in your algorithms you may count x and y from the bottom-left corner. +- Rectangle width and height. + +OpenCV typically assumes that the top and left boundary of the rectangle are inclusive, while the +right and bottom boundaries are not. For example, the method Rect_::contains returns true if + +\f[x \leq pt.x < x+width, + y \leq pt.y < y+height\f] + +Virtually every loop over an image ROI in OpenCV (where ROI is specified by Rect_\ ) is +implemented as: +@code + for(int y = roi.y; y < roi.y + roi.height; y++) + for(int x = roi.x; x < roi.x + roi.width; x++) + { + // ... + } +@endcode +In addition to the class members, the following operations on rectangles are implemented: +- \f$\texttt{rect} = \texttt{rect} \pm \texttt{point}\f$ (shifting a rectangle by a certain offset) +- \f$\texttt{rect} = \texttt{rect} \pm \texttt{size}\f$ (expanding or shrinking a rectangle by a + certain amount) +- rect += point, rect -= point, rect += size, rect -= size (augmenting operations) +- rect = rect1 & rect2 (rectangle intersection) +- rect = rect1 | rect2 (minimum area rectangle containing rect1 and rect2 ) +- rect &= rect1, rect |= rect1 (and the corresponding augmenting operations) +- rect == rect1, rect != rect1 (rectangle comparison) + +This is an example how the partial ordering on rectangles can be established (rect1 \f$\subseteq\f$ +rect2): +@code + template inline bool + operator <= (const Rect_<_Tp>& r1, const Rect_<_Tp>& r2) + { + return (r1 & r2) == r1; + } +@endcode +For your convenience, the Rect_\<\> alias is available: cv::Rect +*/ +template class Rect_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Rect_(); + Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height); + Rect_(const Rect_& r); + Rect_(Rect_&& r) CV_NOEXCEPT; + Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz); + Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2); + + Rect_& operator = ( const Rect_& r ); + Rect_& operator = ( Rect_&& r ) CV_NOEXCEPT; + //! the top-left corner + Point_<_Tp> tl() const; + //! the bottom-right corner + Point_<_Tp> br() const; + + //! size (width, height) of the rectangle + Size_<_Tp> size() const; + //! area (width*height) of the rectangle + _Tp area() const; + //! true if empty + bool empty() const; + + //! conversion to another data type + template operator Rect_<_Tp2>() const; + + //! checks whether the rectangle contains the point + bool contains(const Point_<_Tp>& pt) const; + + _Tp x; //!< x coordinate of the top-left corner + _Tp y; //!< y coordinate of the top-left corner + _Tp width; //!< width of the rectangle + _Tp height; //!< height of the rectangle +}; + +typedef Rect_ Rect2i; +typedef Rect_ Rect2f; +typedef Rect_ Rect2d; +typedef Rect2i Rect; + +template class DataType< Rect_<_Tp> > +{ +public: + typedef Rect_<_Tp> value_type; + typedef Rect_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 4, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Rect_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Rect_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; }; +} // namespace + +///////////////////////////// RotatedRect ///////////////////////////// + +/** @brief The class represents rotated (i.e. not up-right) rectangles on a plane. + +Each rectangle is specified by the center point (mass center), length of each side (represented by +#Size2f structure) and the rotation angle in degrees. + +The sample below demonstrates how to use RotatedRect: +@snippet snippets/core_various.cpp RotatedRect_demo +![image](pics/rotatedrect.png) + +@sa CamShift, fitEllipse, minAreaRect, CvBox2D +*/ +class CV_EXPORTS RotatedRect +{ +public: + //! default constructor + RotatedRect(); + /** full constructor + @param center The rectangle mass center. + @param size Width and height of the rectangle. + @param angle The rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc., + the rectangle becomes an up-right rectangle. + */ + RotatedRect(const Point2f& center, const Size2f& size, float angle); + /** + Any 3 end points of the RotatedRect. They must be given in order (either clockwise or + anticlockwise). + */ + RotatedRect(const Point2f& point1, const Point2f& point2, const Point2f& point3); + + /** returns 4 vertices of the rectangle + @param pts The points array for storing rectangle vertices. The order is bottomLeft, topLeft, topRight, bottomRight. + */ + void points(Point2f pts[]) const; + //! returns the minimal up-right integer rectangle containing the rotated rectangle + Rect boundingRect() const; + //! returns the minimal (exact) floating point rectangle containing the rotated rectangle, not intended for use with images + Rect_ boundingRect2f() const; + //! returns the rectangle mass center + Point2f center; + //! returns width and height of the rectangle + Size2f size; + //! returns the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle. + float angle; +}; + +template<> class DataType< RotatedRect > +{ +public: + typedef RotatedRect value_type; + typedef value_type work_type; + typedef float channel_type; + + enum { generic_type = 0, + channels = (int)sizeof(value_type)/sizeof(channel_type), // 5 + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template<> +struct Depth< RotatedRect > { enum { value = Depth::value }; }; +template<> +struct Type< RotatedRect > { enum { value = CV_MAKETYPE(Depth::value, (int)sizeof(RotatedRect)/sizeof(float)) }; }; +} // namespace + + +//////////////////////////////// Range ///////////////////////////////// + +/** @brief Template class specifying a continuous subsequence (slice) of a sequence. + +The class is used to specify a row or a column span in a matrix ( Mat ) and for many other purposes. +Range(a,b) is basically the same as a:b in Matlab or a..b in Python. As in Python, start is an +inclusive left boundary of the range and end is an exclusive right boundary of the range. Such a +half-opened interval is usually denoted as \f$[start,end)\f$ . + +The static method Range::all() returns a special variable that means "the whole sequence" or "the +whole range", just like " : " in Matlab or " ... " in Python. All the methods and functions in +OpenCV that take Range support this special Range::all() value. But, of course, in case of your own +custom processing, you will probably have to check and handle it explicitly: +@code + void my_function(..., const Range& r, ....) + { + if(r == Range::all()) { + // process all the data + } + else { + // process [r.start, r.end) + } + } +@endcode +*/ +class CV_EXPORTS Range +{ +public: + Range(); + Range(int _start, int _end); + int size() const; + bool empty() const; + static Range all(); + + int start, end; +}; + +template<> class DataType +{ +public: + typedef Range value_type; + typedef value_type work_type; + typedef int channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template<> +struct Depth< Range > { enum { value = Depth::value }; }; +template<> +struct Type< Range > { enum { value = CV_MAKETYPE(Depth::value, 2) }; }; +} // namespace + + +//////////////////////////////// Scalar_ /////////////////////////////// + +/** @brief Template class for a 4-element vector derived from Vec. + +Being derived from Vec\<_Tp, 4\> , Scalar\_ and Scalar can be used just as typical 4-element +vectors. In addition, they can be converted to/from CvScalar . The type Scalar is widely used in +OpenCV to pass pixel values. +*/ +template class Scalar_ : public Vec<_Tp, 4> +{ +public: + //! default constructor + Scalar_(); + Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0); + Scalar_(_Tp v0); + + Scalar_(const Scalar_& s); + Scalar_(Scalar_&& s) CV_NOEXCEPT; + + Scalar_& operator=(const Scalar_& s); + Scalar_& operator=(Scalar_&& s) CV_NOEXCEPT; + + template + Scalar_(const Vec<_Tp2, cn>& v); + + //! returns a scalar with all elements set to v0 + static Scalar_<_Tp> all(_Tp v0); + + //! conversion to another data type + template operator Scalar_() const; + + //! per-element product + Scalar_<_Tp> mul(const Scalar_<_Tp>& a, double scale=1 ) const; + + //! returns (v0, -v1, -v2, -v3) + Scalar_<_Tp> conj() const; + + //! returns true iff v1 == v2 == v3 == 0 + bool isReal() const; +}; + +typedef Scalar_ Scalar; + +template class DataType< Scalar_<_Tp> > +{ +public: + typedef Scalar_<_Tp> value_type; + typedef Scalar_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 4, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Scalar_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Scalar_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; }; +} // namespace + + +/////////////////////////////// KeyPoint //////////////////////////////// + +/** @brief Data structure for salient point detectors. + +The class instance stores a keypoint, i.e. a point feature found by one of many available keypoint +detectors, such as Harris corner detector, #FAST, %StarDetector, %SURF, %SIFT etc. + +The keypoint is characterized by the 2D position, scale (proportional to the diameter of the +neighborhood that needs to be taken into account), orientation and some other parameters. The +keypoint neighborhood is then analyzed by another algorithm that builds a descriptor (usually +represented as a feature vector). The keypoints representing the same object in different images +can then be matched using %KDTree or another method. +*/ +class CV_EXPORTS_W_SIMPLE KeyPoint +{ +public: + //! the default constructor + CV_WRAP KeyPoint(); + /** + @param _pt x & y coordinates of the keypoint + @param _size keypoint diameter + @param _angle keypoint orientation + @param _response keypoint detector response on the keypoint (that is, strength of the keypoint) + @param _octave pyramid octave in which the keypoint has been detected + @param _class_id object id + */ + KeyPoint(Point2f _pt, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1); + /** + @param x x-coordinate of the keypoint + @param y y-coordinate of the keypoint + @param _size keypoint diameter + @param _angle keypoint orientation + @param _response keypoint detector response on the keypoint (that is, strength of the keypoint) + @param _octave pyramid octave in which the keypoint has been detected + @param _class_id object id + */ + CV_WRAP KeyPoint(float x, float y, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1); + + size_t hash() const; + + /** + This method converts vector of keypoints to vector of points or the reverse, where each keypoint is + assigned the same size and the same orientation. + + @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB + @param points2f Array of (x,y) coordinates of each keypoint + @param keypointIndexes Array of indexes of keypoints to be converted to points. (Acts like a mask to + convert only specified keypoints) + */ + CV_WRAP static void convert(const std::vector& keypoints, + CV_OUT std::vector& points2f, + const std::vector& keypointIndexes=std::vector()); + /** @overload + @param points2f Array of (x,y) coordinates of each keypoint + @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB + @param size keypoint diameter + @param response keypoint detector response on the keypoint (that is, strength of the keypoint) + @param octave pyramid octave in which the keypoint has been detected + @param class_id object id + */ + CV_WRAP static void convert(const std::vector& points2f, + CV_OUT std::vector& keypoints, + float size=1, float response=1, int octave=0, int class_id=-1); + + /** + This method computes overlap for pair of keypoints. Overlap is the ratio between area of keypoint + regions' intersection and area of keypoint regions' union (considering keypoint region as circle). + If they don't overlap, we get zero. If they coincide at same location with same size, we get 1. + @param kp1 First keypoint + @param kp2 Second keypoint + */ + CV_WRAP static float overlap(const KeyPoint& kp1, const KeyPoint& kp2); + + CV_PROP_RW Point2f pt; //!< coordinates of the keypoints + CV_PROP_RW float size; //!< diameter of the meaningful keypoint neighborhood + CV_PROP_RW float angle; //!< computed orientation of the keypoint (-1 if not applicable); + //!< it's in [0,360) degrees and measured relative to + //!< image coordinate system, ie in clockwise. + CV_PROP_RW float response; //!< the response by which the most strong keypoints have been selected. Can be used for the further sorting or subsampling + CV_PROP_RW int octave; //!< octave (pyramid layer) from which the keypoint has been extracted + CV_PROP_RW int class_id; //!< object class (if the keypoints need to be clustered by an object they belong to) +}; + +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED +template<> class DataType +{ +public: + typedef KeyPoint value_type; + typedef float work_type; + typedef float channel_type; + + enum { generic_type = 0, + depth = DataType::depth, + channels = (int)(sizeof(value_type)/sizeof(channel_type)), // 7 + fmt = DataType::fmt + ((channels - 1) << 8), + type = CV_MAKETYPE(depth, channels) + }; + + typedef Vec vec_type; +}; +#endif + + +//////////////////////////////// DMatch ///////////////////////////////// + +/** @brief Class for matching keypoint descriptors + +query descriptor index, train descriptor index, train image index, and distance between +descriptors. +*/ +class CV_EXPORTS_W_SIMPLE DMatch +{ +public: + CV_WRAP DMatch(); + CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance); + CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance); + + CV_PROP_RW int queryIdx; //!< query descriptor index + CV_PROP_RW int trainIdx; //!< train descriptor index + CV_PROP_RW int imgIdx; //!< train image index + + CV_PROP_RW float distance; + + // less is better + bool operator<(const DMatch &m) const; +}; + +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED +template<> class DataType +{ +public: + typedef DMatch value_type; + typedef int work_type; + typedef int channel_type; + + enum { generic_type = 0, + depth = DataType::depth, + channels = (int)(sizeof(value_type)/sizeof(channel_type)), // 4 + fmt = DataType::fmt + ((channels - 1) << 8), + type = CV_MAKETYPE(depth, channels) + }; + + typedef Vec vec_type; +}; +#endif + + +///////////////////////////// TermCriteria ////////////////////////////// + +/** @brief The class defining termination criteria for iterative algorithms. + +You can initialize it by default constructor and then override any parameters, or the structure may +be fully initialized using the advanced variant of the constructor. +*/ +class CV_EXPORTS TermCriteria +{ +public: + /** + Criteria type, can be one of: COUNT, EPS or COUNT + EPS + */ + enum Type + { + COUNT=1, //!< the maximum number of iterations or elements to compute + MAX_ITER=COUNT, //!< ditto + EPS=2 //!< the desired accuracy or change in parameters at which the iterative algorithm stops + }; + + //! default constructor + TermCriteria(); + /** + @param type The type of termination criteria, one of TermCriteria::Type + @param maxCount The maximum number of iterations or elements to compute. + @param epsilon The desired accuracy or change in parameters at which the iterative algorithm stops. + */ + TermCriteria(int type, int maxCount, double epsilon); + + inline bool isValid() const + { + const bool isCount = (type & COUNT) && maxCount > 0; + const bool isEps = (type & EPS) && !cvIsNaN(epsilon); + return isCount || isEps; + } + + int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS + int maxCount; //!< the maximum number of iterations/elements + double epsilon; //!< the desired accuracy +}; + + +//! @} core_basic + +///////////////////////// raster image moments ////////////////////////// + +//! @addtogroup imgproc_shape +//! @{ + +/** @brief struct returned by cv::moments + +The spatial moments \f$\texttt{Moments::m}_{ji}\f$ are computed as: + +\f[\texttt{m} _{ji}= \sum _{x,y} \left ( \texttt{array} (x,y) \cdot x^j \cdot y^i \right )\f] + +The central moments \f$\texttt{Moments::mu}_{ji}\f$ are computed as: + +\f[\texttt{mu} _{ji}= \sum _{x,y} \left ( \texttt{array} (x,y) \cdot (x - \bar{x} )^j \cdot (y - \bar{y} )^i \right )\f] + +where \f$(\bar{x}, \bar{y})\f$ is the mass center: + +\f[\bar{x} = \frac{\texttt{m}_{10}}{\texttt{m}_{00}} , \; \bar{y} = \frac{\texttt{m}_{01}}{\texttt{m}_{00}}\f] + +The normalized central moments \f$\texttt{Moments::nu}_{ij}\f$ are computed as: + +\f[\texttt{nu} _{ji}= \frac{\texttt{mu}_{ji}}{\texttt{m}_{00}^{(i+j)/2+1}} .\f] + +@note +\f$\texttt{mu}_{00}=\texttt{m}_{00}\f$, \f$\texttt{nu}_{00}=1\f$ +\f$\texttt{nu}_{10}=\texttt{mu}_{10}=\texttt{mu}_{01}=\texttt{mu}_{10}=0\f$ , hence the values are not +stored. + +The moments of a contour are defined in the same way but computed using the Green's formula (see +). So, due to a limited raster resolution, the moments +computed for a contour are slightly different from the moments computed for the same rasterized +contour. + +@note +Since the contour moments are computed using Green formula, you may get seemingly odd results for +contours with self-intersections, e.g. a zero area (m00) for butterfly-shaped contours. + */ +class CV_EXPORTS_W_MAP Moments +{ +public: + //! the default constructor + Moments(); + //! the full constructor + Moments(double m00, double m10, double m01, double m20, double m11, + double m02, double m30, double m21, double m12, double m03 ); + ////! the conversion from CvMoments + //Moments( const CvMoments& moments ); + ////! the conversion to CvMoments + //operator CvMoments() const; + + //! @name spatial moments + //! @{ + CV_PROP_RW double m00, m10, m01, m20, m11, m02, m30, m21, m12, m03; + //! @} + + //! @name central moments + //! @{ + CV_PROP_RW double mu20, mu11, mu02, mu30, mu21, mu12, mu03; + //! @} + + //! @name central normalized moments + //! @{ + CV_PROP_RW double nu20, nu11, nu02, nu30, nu21, nu12, nu03; + //! @} +}; + +template<> class DataType +{ +public: + typedef Moments value_type; + typedef double work_type; + typedef double channel_type; + + enum { generic_type = 0, + channels = (int)(sizeof(value_type)/sizeof(channel_type)), // 24 + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template<> +struct Depth< Moments > { enum { value = Depth::value }; }; +template<> +struct Type< Moments > { enum { value = CV_MAKETYPE(Depth::value, (int)(sizeof(Moments)/sizeof(double))) }; }; +} // namespace + +//! @} imgproc_shape + +//! @cond IGNORED + +///////////////////////////////////////////////////////////////////////// +///////////////////////////// Implementation //////////////////////////// +///////////////////////////////////////////////////////////////////////// + +//////////////////////////////// Complex //////////////////////////////// + +template inline +Complex<_Tp>::Complex() + : re(0), im(0) {} + +template inline +Complex<_Tp>::Complex( _Tp _re, _Tp _im ) + : re(_re), im(_im) {} + +template template inline +Complex<_Tp>::operator Complex() const +{ + return Complex(saturate_cast(re), saturate_cast(im)); +} + +template inline +Complex<_Tp> Complex<_Tp>::conj() const +{ + return Complex<_Tp>(re, -im); +} + + +template static inline +bool operator == (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return a.re == b.re && a.im == b.im; +} + +template static inline +bool operator != (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return a.re != b.re || a.im != b.im; +} + +template static inline +Complex<_Tp> operator + (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return Complex<_Tp>( a.re + b.re, a.im + b.im ); +} + +template static inline +Complex<_Tp>& operator += (Complex<_Tp>& a, const Complex<_Tp>& b) +{ + a.re += b.re; a.im += b.im; + return a; +} + +template static inline +Complex<_Tp> operator - (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return Complex<_Tp>( a.re - b.re, a.im - b.im ); +} + +template static inline +Complex<_Tp>& operator -= (Complex<_Tp>& a, const Complex<_Tp>& b) +{ + a.re -= b.re; a.im -= b.im; + return a; +} + +template static inline +Complex<_Tp> operator - (const Complex<_Tp>& a) +{ + return Complex<_Tp>(-a.re, -a.im); +} + +template static inline +Complex<_Tp> operator * (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return Complex<_Tp>( a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re ); +} + +template static inline +Complex<_Tp> operator * (const Complex<_Tp>& a, _Tp b) +{ + return Complex<_Tp>( a.re*b, a.im*b ); +} + +template static inline +Complex<_Tp> operator * (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>( a.re*b, a.im*b ); +} + +template static inline +Complex<_Tp> operator + (const Complex<_Tp>& a, _Tp b) +{ + return Complex<_Tp>( a.re + b, a.im ); +} + +template static inline +Complex<_Tp> operator - (const Complex<_Tp>& a, _Tp b) +{ return Complex<_Tp>( a.re - b, a.im ); } + +template static inline +Complex<_Tp> operator + (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>( a.re + b, a.im ); +} + +template static inline +Complex<_Tp> operator - (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>( b - a.re, -a.im ); +} + +template static inline +Complex<_Tp>& operator += (Complex<_Tp>& a, _Tp b) +{ + a.re += b; return a; +} + +template static inline +Complex<_Tp>& operator -= (Complex<_Tp>& a, _Tp b) +{ + a.re -= b; return a; +} + +template static inline +Complex<_Tp>& operator *= (Complex<_Tp>& a, _Tp b) +{ + a.re *= b; a.im *= b; return a; +} + +template static inline +double abs(const Complex<_Tp>& a) +{ + return std::sqrt( (double)a.re*a.re + (double)a.im*a.im); +} + +template static inline +Complex<_Tp> operator / (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + double t = 1./((double)b.re*b.re + (double)b.im*b.im); + return Complex<_Tp>( (_Tp)((a.re*b.re + a.im*b.im)*t), + (_Tp)((-a.re*b.im + a.im*b.re)*t) ); +} + +template static inline +Complex<_Tp>& operator /= (Complex<_Tp>& a, const Complex<_Tp>& b) +{ + a = a / b; + return a; +} + +template static inline +Complex<_Tp> operator / (const Complex<_Tp>& a, _Tp b) +{ + _Tp t = (_Tp)1/b; + return Complex<_Tp>( a.re*t, a.im*t ); +} + +template static inline +Complex<_Tp> operator / (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>(b)/a; +} + +template static inline +Complex<_Tp> operator /= (const Complex<_Tp>& a, _Tp b) +{ + _Tp t = (_Tp)1/b; + a.re *= t; a.im *= t; return a; +} + + + +//////////////////////////////// 2D Point /////////////////////////////// + +template inline +Point_<_Tp>::Point_() + : x(0), y(0) {} + +template inline +Point_<_Tp>::Point_(_Tp _x, _Tp _y) + : x(_x), y(_y) {} + +template inline +Point_<_Tp>::Point_(const Point_& pt) + : x(pt.x), y(pt.y) {} + +template inline +Point_<_Tp>::Point_(Point_&& pt) CV_NOEXCEPT + : x(std::move(pt.x)), y(std::move(pt.y)) {} + +template inline +Point_<_Tp>::Point_(const Size_<_Tp>& sz) + : x(sz.width), y(sz.height) {} + +template inline +Point_<_Tp>::Point_(const Vec<_Tp,2>& v) + : x(v[0]), y(v[1]) {} + +template inline +Point_<_Tp>& Point_<_Tp>::operator = (const Point_& pt) +{ + x = pt.x; y = pt.y; + return *this; +} + +template inline +Point_<_Tp>& Point_<_Tp>::operator = (Point_&& pt) CV_NOEXCEPT +{ + x = std::move(pt.x); y = std::move(pt.y); + return *this; +} + +template template inline +Point_<_Tp>::operator Point_<_Tp2>() const +{ + return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); +} + +template inline +Point_<_Tp>::operator Vec<_Tp, 2>() const +{ + return Vec<_Tp, 2>(x, y); +} + +template inline +_Tp Point_<_Tp>::dot(const Point_& pt) const +{ + return saturate_cast<_Tp>(x*pt.x + y*pt.y); +} + +template inline +double Point_<_Tp>::ddot(const Point_& pt) const +{ + return (double)x*(double)(pt.x) + (double)y*(double)(pt.y); +} + +template inline +double Point_<_Tp>::cross(const Point_& pt) const +{ + return (double)x*pt.y - (double)y*pt.x; +} + +template inline bool +Point_<_Tp>::inside( const Rect_<_Tp>& r ) const +{ + return r.contains(*this); +} + + +template static inline +Point_<_Tp>& operator += (Point_<_Tp>& a, const Point_<_Tp>& b) +{ + a.x += b.x; + a.y += b.y; + return a; +} + +template static inline +Point_<_Tp>& operator -= (Point_<_Tp>& a, const Point_<_Tp>& b) +{ + a.x -= b.x; + a.y -= b.y; + return a; +} + +template static inline +Point_<_Tp>& operator *= (Point_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + return a; +} + +template static inline +Point_<_Tp>& operator *= (Point_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + return a; +} + +template static inline +Point_<_Tp>& operator *= (Point_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + return a; +} + +template static inline +Point_<_Tp>& operator /= (Point_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + return a; +} + +template static inline +Point_<_Tp>& operator /= (Point_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + return a; +} + +template static inline +Point_<_Tp>& operator /= (Point_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + return a; +} + +template static inline +double norm(const Point_<_Tp>& pt) +{ + return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y); +} + +template static inline +bool operator == (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return a.x == b.x && a.y == b.y; +} + +template static inline +bool operator != (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return a.x != b.x || a.y != b.y; +} + +template static inline +Point_<_Tp> operator + (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y) ); +} + +template static inline +Point_<_Tp> operator - (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y) ); +} + +template static inline +Point_<_Tp> operator - (const Point_<_Tp>& a) +{ + return Point_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y) ); +} + +template static inline +Point_<_Tp> operator * (const Point_<_Tp>& a, int b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) ); +} + +template static inline +Point_<_Tp> operator * (int a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) ); +} + +template static inline +Point_<_Tp> operator * (const Point_<_Tp>& a, float b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) ); +} + +template static inline +Point_<_Tp> operator * (float a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) ); +} + +template static inline +Point_<_Tp> operator * (const Point_<_Tp>& a, double b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) ); +} + +template static inline +Point_<_Tp> operator * (double a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) ); +} + +template static inline +Point_<_Tp> operator * (const Matx<_Tp, 2, 2>& a, const Point_<_Tp>& b) +{ + Matx<_Tp, 2, 1> tmp = a * Vec<_Tp,2>(b.x, b.y); + return Point_<_Tp>(tmp.val[0], tmp.val[1]); +} + +template static inline +Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point_<_Tp>& b) +{ + Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, 1); + return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]); +} + +template static inline +Point_<_Tp> operator / (const Point_<_Tp>& a, int b) +{ + Point_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point_<_Tp> operator / (const Point_<_Tp>& a, float b) +{ + Point_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point_<_Tp> operator / (const Point_<_Tp>& a, double b) +{ + Point_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + + +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); + +template<> inline int normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline int64 normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline float normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline double normL2Sqr(const Point_& pt) { return pt.dot(pt); } + +template<> inline double normL2Sqr(const Point_& pt) { return pt.ddot(pt); } +template<> inline double normL2Sqr(const Point_& pt) { return pt.ddot(pt); } + + + +//////////////////////////////// 3D Point /////////////////////////////// + +template inline +Point3_<_Tp>::Point3_() + : x(0), y(0), z(0) {} + +template inline +Point3_<_Tp>::Point3_(_Tp _x, _Tp _y, _Tp _z) + : x(_x), y(_y), z(_z) {} + +template inline +Point3_<_Tp>::Point3_(const Point3_& pt) + : x(pt.x), y(pt.y), z(pt.z) {} + +template inline +Point3_<_Tp>::Point3_(Point3_&& pt) CV_NOEXCEPT + : x(std::move(pt.x)), y(std::move(pt.y)), z(std::move(pt.z)) {} + +template inline +Point3_<_Tp>::Point3_(const Point_<_Tp>& pt) + : x(pt.x), y(pt.y), z(_Tp()) {} + +template inline +Point3_<_Tp>::Point3_(const Vec<_Tp, 3>& v) + : x(v[0]), y(v[1]), z(v[2]) {} + +template template inline +Point3_<_Tp>::operator Point3_<_Tp2>() const +{ + return Point3_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(z)); +} + +template inline +Point3_<_Tp>::operator Vec<_Tp, 3>() const +{ + return Vec<_Tp, 3>(x, y, z); +} + +template inline +Point3_<_Tp>& Point3_<_Tp>::operator = (const Point3_& pt) +{ + x = pt.x; y = pt.y; z = pt.z; + return *this; +} + +template inline +Point3_<_Tp>& Point3_<_Tp>::operator = (Point3_&& pt) CV_NOEXCEPT +{ + x = std::move(pt.x); y = std::move(pt.y); z = std::move(pt.z); + return *this; +} + +template inline +_Tp Point3_<_Tp>::dot(const Point3_& pt) const +{ + return saturate_cast<_Tp>(x*pt.x + y*pt.y + z*pt.z); +} + +template inline +double Point3_<_Tp>::ddot(const Point3_& pt) const +{ + return (double)x*pt.x + (double)y*pt.y + (double)z*pt.z; +} + +template inline +Point3_<_Tp> Point3_<_Tp>::cross(const Point3_<_Tp>& pt) const +{ + return Point3_<_Tp>(y*pt.z - z*pt.y, z*pt.x - x*pt.z, x*pt.y - y*pt.x); +} + + +template static inline +Point3_<_Tp>& operator += (Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + return a; +} + +template static inline +Point3_<_Tp>& operator -= (Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + return a; +} + +template static inline +Point3_<_Tp>& operator *= (Point3_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + a.z = saturate_cast<_Tp>(a.z * b); + return a; +} + +template static inline +Point3_<_Tp>& operator *= (Point3_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + a.z = saturate_cast<_Tp>(a.z * b); + return a; +} + +template static inline +Point3_<_Tp>& operator *= (Point3_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + a.z = saturate_cast<_Tp>(a.z * b); + return a; +} + +template static inline +Point3_<_Tp>& operator /= (Point3_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + a.z = saturate_cast<_Tp>(a.z / b); + return a; +} + +template static inline +Point3_<_Tp>& operator /= (Point3_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + a.z = saturate_cast<_Tp>(a.z / b); + return a; +} + +template static inline +Point3_<_Tp>& operator /= (Point3_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + a.z = saturate_cast<_Tp>(a.z / b); + return a; +} + +template static inline +double norm(const Point3_<_Tp>& pt) +{ + return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y + (double)pt.z*pt.z); +} + +template static inline +bool operator == (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return a.x == b.x && a.y == b.y && a.z == b.z; +} + +template static inline +bool operator != (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return a.x != b.x || a.y != b.y || a.z != b.z; +} + +template static inline +Point3_<_Tp> operator + (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y), saturate_cast<_Tp>(a.z + b.z)); +} + +template static inline +Point3_<_Tp> operator - (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y), saturate_cast<_Tp>(a.z - b.z)); +} + +template static inline +Point3_<_Tp> operator - (const Point3_<_Tp>& a) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y), saturate_cast<_Tp>(-a.z) ); +} + +template static inline +Point3_<_Tp> operator * (const Point3_<_Tp>& a, int b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b), saturate_cast<_Tp>(a.z*b) ); +} + +template static inline +Point3_<_Tp> operator * (int a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) ); +} + +template static inline +Point3_<_Tp> operator * (const Point3_<_Tp>& a, float b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) ); +} + +template static inline +Point3_<_Tp> operator * (float a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) ); +} + +template static inline +Point3_<_Tp> operator * (const Point3_<_Tp>& a, double b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) ); +} + +template static inline +Point3_<_Tp> operator * (double a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) ); +} + +template static inline +Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point3_<_Tp>& b) +{ + Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, b.z); + return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]); +} + +template static inline +Matx<_Tp, 4, 1> operator * (const Matx<_Tp, 4, 4>& a, const Point3_<_Tp>& b) +{ + return a * Matx<_Tp, 4, 1>(b.x, b.y, b.z, 1); +} + +template static inline +Point3_<_Tp> operator / (const Point3_<_Tp>& a, int b) +{ + Point3_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point3_<_Tp> operator / (const Point3_<_Tp>& a, float b) +{ + Point3_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point3_<_Tp> operator / (const Point3_<_Tp>& a, double b) +{ + Point3_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + + + +////////////////////////////////// Size ///////////////////////////////// + +template inline +Size_<_Tp>::Size_() + : width(0), height(0) {} + +template inline +Size_<_Tp>::Size_(_Tp _width, _Tp _height) + : width(_width), height(_height) {} + +template inline +Size_<_Tp>::Size_(const Size_& sz) + : width(sz.width), height(sz.height) {} + +template inline +Size_<_Tp>::Size_(Size_&& sz) CV_NOEXCEPT + : width(std::move(sz.width)), height(std::move(sz.height)) {} + +template inline +Size_<_Tp>::Size_(const Point_<_Tp>& pt) + : width(pt.x), height(pt.y) {} + +template template inline +Size_<_Tp>::operator Size_<_Tp2>() const +{ + return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); +} + +template inline +Size_<_Tp>& Size_<_Tp>::operator = (const Size_<_Tp>& sz) +{ + width = sz.width; height = sz.height; + return *this; +} + +template inline +Size_<_Tp>& Size_<_Tp>::operator = (Size_<_Tp>&& sz) CV_NOEXCEPT +{ + width = std::move(sz.width); height = std::move(sz.height); + return *this; +} + +template inline +_Tp Size_<_Tp>::area() const +{ + const _Tp result = width * height; + CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer + || width == 0 || result / width == height); // make sure the result fits in the return value + return result; +} + +template inline +double Size_<_Tp>::aspectRatio() const +{ + return width / static_cast(height); +} + +template inline +bool Size_<_Tp>::empty() const +{ + return width <= 0 || height <= 0; +} + + +template static inline +Size_<_Tp>& operator *= (Size_<_Tp>& a, _Tp b) +{ + a.width *= b; + a.height *= b; + return a; +} + +template static inline +Size_<_Tp> operator * (const Size_<_Tp>& a, _Tp b) +{ + Size_<_Tp> tmp(a); + tmp *= b; + return tmp; +} + +template static inline +Size_<_Tp>& operator /= (Size_<_Tp>& a, _Tp b) +{ + a.width /= b; + a.height /= b; + return a; +} + +template static inline +Size_<_Tp> operator / (const Size_<_Tp>& a, _Tp b) +{ + Size_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Size_<_Tp>& operator += (Size_<_Tp>& a, const Size_<_Tp>& b) +{ + a.width += b.width; + a.height += b.height; + return a; +} + +template static inline +Size_<_Tp> operator + (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + Size_<_Tp> tmp(a); + tmp += b; + return tmp; +} + +template static inline +Size_<_Tp>& operator -= (Size_<_Tp>& a, const Size_<_Tp>& b) +{ + a.width -= b.width; + a.height -= b.height; + return a; +} + +template static inline +Size_<_Tp> operator - (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + Size_<_Tp> tmp(a); + tmp -= b; + return tmp; +} + +template static inline +bool operator == (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + return a.width == b.width && a.height == b.height; +} + +template static inline +bool operator != (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + return !(a == b); +} + + + +////////////////////////////////// Rect ///////////////////////////////// + +template inline +Rect_<_Tp>::Rect_() + : x(0), y(0), width(0), height(0) {} + +template inline +Rect_<_Tp>::Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height) + : x(_x), y(_y), width(_width), height(_height) {} + +template inline +Rect_<_Tp>::Rect_(const Rect_<_Tp>& r) + : x(r.x), y(r.y), width(r.width), height(r.height) {} + +template inline +Rect_<_Tp>::Rect_(Rect_<_Tp>&& r) CV_NOEXCEPT + : x(std::move(r.x)), y(std::move(r.y)), width(std::move(r.width)), height(std::move(r.height)) {} + +template inline +Rect_<_Tp>::Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz) + : x(org.x), y(org.y), width(sz.width), height(sz.height) {} + +template inline +Rect_<_Tp>::Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2) +{ + x = std::min(pt1.x, pt2.x); + y = std::min(pt1.y, pt2.y); + width = std::max(pt1.x, pt2.x) - x; + height = std::max(pt1.y, pt2.y) - y; +} + +template inline +Rect_<_Tp>& Rect_<_Tp>::operator = ( const Rect_<_Tp>& r ) +{ + x = r.x; + y = r.y; + width = r.width; + height = r.height; + return *this; +} + +template inline +Rect_<_Tp>& Rect_<_Tp>::operator = ( Rect_<_Tp>&& r ) CV_NOEXCEPT +{ + x = std::move(r.x); + y = std::move(r.y); + width = std::move(r.width); + height = std::move(r.height); + return *this; +} + +template inline +Point_<_Tp> Rect_<_Tp>::tl() const +{ + return Point_<_Tp>(x,y); +} + +template inline +Point_<_Tp> Rect_<_Tp>::br() const +{ + return Point_<_Tp>(x + width, y + height); +} + +template inline +Size_<_Tp> Rect_<_Tp>::size() const +{ + return Size_<_Tp>(width, height); +} + +template inline +_Tp Rect_<_Tp>::area() const +{ + const _Tp result = width * height; + CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer + || width == 0 || result / width == height); // make sure the result fits in the return value + return result; +} + +template inline +bool Rect_<_Tp>::empty() const +{ + return width <= 0 || height <= 0; +} + +template template inline +Rect_<_Tp>::operator Rect_<_Tp2>() const +{ + return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); +} + +template inline +bool Rect_<_Tp>::contains(const Point_<_Tp>& pt) const +{ + return x <= pt.x && pt.x < x + width && y <= pt.y && pt.y < y + height; +} + + +template static inline +Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Point_<_Tp>& b ) +{ + a.x += b.x; + a.y += b.y; + return a; +} + +template static inline +Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Point_<_Tp>& b ) +{ + a.x -= b.x; + a.y -= b.y; + return a; +} + +template static inline +Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b ) +{ + a.width += b.width; + a.height += b.height; + return a; +} + +template static inline +Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b ) +{ + const _Tp width = a.width - b.width; + const _Tp height = a.height - b.height; + CV_DbgAssert(width >= 0 && height >= 0); + a.width = width; + a.height = height; + return a; +} + +template static inline +Rect_<_Tp>& operator &= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) +{ + _Tp x1 = std::max(a.x, b.x); + _Tp y1 = std::max(a.y, b.y); + a.width = std::min(a.x + a.width, b.x + b.width) - x1; + a.height = std::min(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + if( a.width <= 0 || a.height <= 0 ) + a = Rect(); + return a; +} + +template static inline +Rect_<_Tp>& operator |= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) +{ + if (a.empty()) { + a = b; + } + else if (!b.empty()) { + _Tp x1 = std::min(a.x, b.x); + _Tp y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + } + return a; +} + +template static inline +bool operator == (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + return a.x == b.x && a.y == b.y && a.width == b.width && a.height == b.height; +} + +template static inline +bool operator != (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + return a.x != b.x || a.y != b.y || a.width != b.width || a.height != b.height; +} + +template static inline +Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Point_<_Tp>& b) +{ + return Rect_<_Tp>( a.x + b.x, a.y + b.y, a.width, a.height ); +} + +template static inline +Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Point_<_Tp>& b) +{ + return Rect_<_Tp>( a.x - b.x, a.y - b.y, a.width, a.height ); +} + +template static inline +Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b) +{ + return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height ); +} + +template static inline +Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Size_<_Tp>& b) +{ + const _Tp width = a.width - b.width; + const _Tp height = a.height - b.height; + CV_DbgAssert(width >= 0 && height >= 0); + return Rect_<_Tp>( a.x, a.y, width, height ); +} + +template static inline +Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c &= b; +} + +template static inline +Rect_<_Tp> operator | (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c |= b; +} + +/** + * @brief measure dissimilarity between two sample sets + * + * computes the complement of the Jaccard Index as described in . + * For rectangles this reduces to computing the intersection over the union. + */ +template static inline +double jaccardDistance(const Rect_<_Tp>& a, const Rect_<_Tp>& b) { + _Tp Aa = a.area(); + _Tp Ab = b.area(); + + if ((Aa + Ab) <= std::numeric_limits<_Tp>::epsilon()) { + // jaccard_index = 1 -> distance = 0 + return 0.0; + } + + double Aab = (a & b).area(); + // distance = 1 - jaccard_index + return 1.0 - Aab / (Aa + Ab - Aab); +} + +////////////////////////////// RotatedRect ////////////////////////////// + +inline +RotatedRect::RotatedRect() + : center(), size(), angle(0) {} + +inline +RotatedRect::RotatedRect(const Point2f& _center, const Size2f& _size, float _angle) + : center(_center), size(_size), angle(_angle) {} + +///////////////////////////////// Range ///////////////////////////////// + +inline +Range::Range() + : start(0), end(0) {} + +inline +Range::Range(int _start, int _end) + : start(_start), end(_end) {} + +inline +int Range::size() const +{ + return end - start; +} + +inline +bool Range::empty() const +{ + return start == end; +} + +inline +Range Range::all() +{ + return Range(INT_MIN, INT_MAX); +} + + +static inline +bool operator == (const Range& r1, const Range& r2) +{ + return r1.start == r2.start && r1.end == r2.end; +} + +static inline +bool operator != (const Range& r1, const Range& r2) +{ + return !(r1 == r2); +} + +static inline +bool operator !(const Range& r) +{ + return r.start == r.end; +} + +static inline +Range operator & (const Range& r1, const Range& r2) +{ + Range r(std::max(r1.start, r2.start), std::min(r1.end, r2.end)); + r.end = std::max(r.end, r.start); + return r; +} + +static inline +Range& operator &= (Range& r1, const Range& r2) +{ + r1 = r1 & r2; + return r1; +} + +static inline +Range operator + (const Range& r1, int delta) +{ + return Range(r1.start + delta, r1.end + delta); +} + +static inline +Range operator + (int delta, const Range& r1) +{ + return Range(r1.start + delta, r1.end + delta); +} + +static inline +Range operator - (const Range& r1, int delta) +{ + return r1 + (-delta); +} + + + +///////////////////////////////// Scalar //////////////////////////////// + +template inline +Scalar_<_Tp>::Scalar_() +{ + this->val[0] = this->val[1] = this->val[2] = this->val[3] = 0; +} + +template inline +Scalar_<_Tp>::Scalar_(_Tp v0, _Tp v1, _Tp v2, _Tp v3) +{ + this->val[0] = v0; + this->val[1] = v1; + this->val[2] = v2; + this->val[3] = v3; +} + +template inline +Scalar_<_Tp>::Scalar_(const Scalar_<_Tp>& s) : Vec<_Tp, 4>(s) { +} + +template inline +Scalar_<_Tp>::Scalar_(Scalar_<_Tp>&& s) CV_NOEXCEPT { + this->val[0] = std::move(s.val[0]); + this->val[1] = std::move(s.val[1]); + this->val[2] = std::move(s.val[2]); + this->val[3] = std::move(s.val[3]); +} + +template inline +Scalar_<_Tp>& Scalar_<_Tp>::operator=(const Scalar_<_Tp>& s) { + this->val[0] = s.val[0]; + this->val[1] = s.val[1]; + this->val[2] = s.val[2]; + this->val[3] = s.val[3]; + return *this; +} + +template inline +Scalar_<_Tp>& Scalar_<_Tp>::operator=(Scalar_<_Tp>&& s) CV_NOEXCEPT { + this->val[0] = std::move(s.val[0]); + this->val[1] = std::move(s.val[1]); + this->val[2] = std::move(s.val[2]); + this->val[3] = std::move(s.val[3]); + return *this; +} + +template template inline +Scalar_<_Tp>::Scalar_(const Vec<_Tp2, cn>& v) +{ + int i; + for( i = 0; i < (cn < 4 ? cn : 4); i++ ) + this->val[i] = cv::saturate_cast<_Tp>(v.val[i]); + for( ; i < 4; i++ ) + this->val[i] = 0; +} + +template inline +Scalar_<_Tp>::Scalar_(_Tp v0) +{ + this->val[0] = v0; + this->val[1] = this->val[2] = this->val[3] = 0; +} + +template inline +Scalar_<_Tp> Scalar_<_Tp>::all(_Tp v0) +{ + return Scalar_<_Tp>(v0, v0, v0, v0); +} + + +template inline +Scalar_<_Tp> Scalar_<_Tp>::mul(const Scalar_<_Tp>& a, double scale ) const +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(this->val[0] * a.val[0] * scale), + saturate_cast<_Tp>(this->val[1] * a.val[1] * scale), + saturate_cast<_Tp>(this->val[2] * a.val[2] * scale), + saturate_cast<_Tp>(this->val[3] * a.val[3] * scale)); +} + +template inline +Scalar_<_Tp> Scalar_<_Tp>::conj() const +{ + return Scalar_<_Tp>(saturate_cast<_Tp>( this->val[0]), + saturate_cast<_Tp>(-this->val[1]), + saturate_cast<_Tp>(-this->val[2]), + saturate_cast<_Tp>(-this->val[3])); +} + +template inline +bool Scalar_<_Tp>::isReal() const +{ + return this->val[1] == 0 && this->val[2] == 0 && this->val[3] == 0; +} + + +template template inline +Scalar_<_Tp>::operator Scalar_() const +{ + return Scalar_(saturate_cast(this->val[0]), + saturate_cast(this->val[1]), + saturate_cast(this->val[2]), + saturate_cast(this->val[3])); +} + + +template static inline +Scalar_<_Tp>& operator += (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a.val[0] += b.val[0]; + a.val[1] += b.val[1]; + a.val[2] += b.val[2]; + a.val[3] += b.val[3]; + return a; +} + +template static inline +Scalar_<_Tp>& operator -= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a.val[0] -= b.val[0]; + a.val[1] -= b.val[1]; + a.val[2] -= b.val[2]; + a.val[3] -= b.val[3]; + return a; +} + +template static inline +Scalar_<_Tp>& operator *= ( Scalar_<_Tp>& a, _Tp v ) +{ + a.val[0] *= v; + a.val[1] *= v; + a.val[2] *= v; + a.val[3] *= v; + return a; +} + +template static inline +bool operator == ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b ) +{ + return a.val[0] == b.val[0] && a.val[1] == b.val[1] && + a.val[2] == b.val[2] && a.val[3] == b.val[3]; +} + +template static inline +bool operator != ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b ) +{ + return a.val[0] != b.val[0] || a.val[1] != b.val[1] || + a.val[2] != b.val[2] || a.val[3] != b.val[3]; +} + +template static inline +Scalar_<_Tp> operator + (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return Scalar_<_Tp>(a.val[0] + b.val[0], + a.val[1] + b.val[1], + a.val[2] + b.val[2], + a.val[3] + b.val[3]); +} + +template static inline +Scalar_<_Tp> operator - (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(a.val[0] - b.val[0]), + saturate_cast<_Tp>(a.val[1] - b.val[1]), + saturate_cast<_Tp>(a.val[2] - b.val[2]), + saturate_cast<_Tp>(a.val[3] - b.val[3])); +} + +template static inline +Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, _Tp alpha) +{ + return Scalar_<_Tp>(a.val[0] * alpha, + a.val[1] * alpha, + a.val[2] * alpha, + a.val[3] * alpha); +} + +template static inline +Scalar_<_Tp> operator * (_Tp alpha, const Scalar_<_Tp>& a) +{ + return a*alpha; +} + +template static inline +Scalar_<_Tp> operator - (const Scalar_<_Tp>& a) +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(-a.val[0]), + saturate_cast<_Tp>(-a.val[1]), + saturate_cast<_Tp>(-a.val[2]), + saturate_cast<_Tp>(-a.val[3])); +} + + +template static inline +Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(a[0]*b[0] - a[1]*b[1] - a[2]*b[2] - a[3]*b[3]), + saturate_cast<_Tp>(a[0]*b[1] + a[1]*b[0] + a[2]*b[3] - a[3]*b[2]), + saturate_cast<_Tp>(a[0]*b[2] - a[1]*b[3] + a[2]*b[0] + a[3]*b[1]), + saturate_cast<_Tp>(a[0]*b[3] + a[1]*b[2] - a[2]*b[1] + a[3]*b[0])); +} + +template static inline +Scalar_<_Tp>& operator *= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a = a * b; + return a; +} + +template static inline +Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, _Tp alpha) +{ + return Scalar_<_Tp>(a.val[0] / alpha, + a.val[1] / alpha, + a.val[2] / alpha, + a.val[3] / alpha); +} + +template static inline +Scalar_ operator / (const Scalar_& a, float alpha) +{ + float s = 1 / alpha; + return Scalar_(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s); +} + +template static inline +Scalar_ operator / (const Scalar_& a, double alpha) +{ + double s = 1 / alpha; + return Scalar_(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s); +} + +template static inline +Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, _Tp alpha) +{ + a = a / alpha; + return a; +} + +template static inline +Scalar_<_Tp> operator / (_Tp a, const Scalar_<_Tp>& b) +{ + _Tp s = a / (b[0]*b[0] + b[1]*b[1] + b[2]*b[2] + b[3]*b[3]); + return b.conj() * s; +} + +template static inline +Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return a * ((_Tp)1 / b); +} + +template static inline +Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a = a / b; + return a; +} + +template static inline +Scalar operator * (const Matx<_Tp, 4, 4>& a, const Scalar& b) +{ + Matx c((Matx)a, b, Matx_MatMulOp()); + return reinterpret_cast(c); +} + +template<> inline +Scalar operator * (const Matx& a, const Scalar& b) +{ + Matx c(a, b, Matx_MatMulOp()); + return reinterpret_cast(c); +} + + + +//////////////////////////////// KeyPoint /////////////////////////////// + +inline +KeyPoint::KeyPoint() + : pt(0,0), size(0), angle(-1), response(0), octave(0), class_id(-1) {} + +inline +KeyPoint::KeyPoint(Point2f _pt, float _size, float _angle, float _response, int _octave, int _class_id) + : pt(_pt), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {} + +inline +KeyPoint::KeyPoint(float x, float y, float _size, float _angle, float _response, int _octave, int _class_id) + : pt(x, y), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {} + + + +///////////////////////////////// DMatch //////////////////////////////// + +inline +DMatch::DMatch() + : queryIdx(-1), trainIdx(-1), imgIdx(-1), distance(FLT_MAX) {} + +inline +DMatch::DMatch(int _queryIdx, int _trainIdx, float _distance) + : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(-1), distance(_distance) {} + +inline +DMatch::DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance) + : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(_imgIdx), distance(_distance) {} + +inline +bool DMatch::operator < (const DMatch &m) const +{ + return distance < m.distance; +} + + + +////////////////////////////// TermCriteria ///////////////////////////// + +inline +TermCriteria::TermCriteria() + : type(0), maxCount(0), epsilon(0) {} + +inline +TermCriteria::TermCriteria(int _type, int _maxCount, double _epsilon) + : type(_type), maxCount(_maxCount), epsilon(_epsilon) {} + +//! @endcond + +} // cv + +#endif //OPENCV_CORE_TYPES_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/types_c.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/types_c.h new file mode 100644 index 0000000..97aeab3 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/types_c.h @@ -0,0 +1,2122 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_TYPES_H +#define OPENCV_CORE_TYPES_H + +#ifdef CV__ENABLE_C_API_CTORS // invalid C API ctors (must be removed) +#if defined(_WIN32) && !defined(CV__SKIP_MESSAGE_MALFORMED_C_API_CTORS) +#error "C API ctors don't work on Win32: https://github.com/opencv/opencv/issues/15990" +#endif +#endif + +//#define CV__VALIDATE_UNUNITIALIZED_VARS 1 // C++11 & GCC only + +#ifdef __cplusplus + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#define CV_STRUCT_INITIALIZER {0,} +#else +#if defined(__GNUC__) && __GNUC__ == 4 // GCC 4.x warns on "= {}" initialization, fixed in GCC 5.0 +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif +#define CV_STRUCT_INITIALIZER {} +#endif + +#else +#define CV_STRUCT_INITIALIZER {0} +#endif + + +#ifdef HAVE_IPL +# ifndef __IPL_H__ +# if defined _WIN32 +# include +# else +# include +# endif +# endif +#elif defined __IPL_H__ +# define HAVE_IPL +#endif + +#include "opencv2/core/cvdef.h" + +#ifndef SKIP_INCLUDES +#include +#include +#include +#include +#endif // SKIP_INCLUDES + +#if defined _WIN32 +# define CV_CDECL __cdecl +# define CV_STDCALL __stdcall +#else +# define CV_CDECL +# define CV_STDCALL +#endif + +#ifndef CV_DEFAULT +# ifdef __cplusplus +# define CV_DEFAULT(val) = val +# else +# define CV_DEFAULT(val) +# endif +#endif + +#ifndef CV_EXTERN_C_FUNCPTR +# ifdef __cplusplus +# define CV_EXTERN_C_FUNCPTR(x) extern "C" { typedef x; } +# else +# define CV_EXTERN_C_FUNCPTR(x) typedef x +# endif +#endif + +#ifndef CVAPI +# define CVAPI(rettype) CV_EXTERN_C CV_EXPORTS rettype CV_CDECL +#endif + +#ifndef CV_IMPL +# define CV_IMPL CV_EXTERN_C +#endif + +#ifdef __cplusplus +# include "opencv2/core.hpp" +#endif + +/** @addtogroup core_c + @{ +*/ + +/** @brief This is the "metatype" used *only* as a function parameter. + +It denotes that the function accepts arrays of multiple types, such as IplImage*, CvMat* or even +CvSeq* sometimes. The particular array type is determined at runtime by analyzing the first 4 +bytes of the header. In C++ interface the role of CvArr is played by InputArray and OutputArray. + */ +typedef void CvArr; + +typedef int CVStatus; + +/** @see cv::Error::Code */ +enum { + CV_StsOk= 0, /**< everything is ok */ + CV_StsBackTrace= -1, /**< pseudo error for back trace */ + CV_StsError= -2, /**< unknown /unspecified error */ + CV_StsInternal= -3, /**< internal error (bad state) */ + CV_StsNoMem= -4, /**< insufficient memory */ + CV_StsBadArg= -5, /**< function arg/param is bad */ + CV_StsBadFunc= -6, /**< unsupported function */ + CV_StsNoConv= -7, /**< iter. didn't converge */ + CV_StsAutoTrace= -8, /**< tracing */ + CV_HeaderIsNull= -9, /**< image header is NULL */ + CV_BadImageSize= -10, /**< image size is invalid */ + CV_BadOffset= -11, /**< offset is invalid */ + CV_BadDataPtr= -12, /**/ + CV_BadStep= -13, /**< image step is wrong, this may happen for a non-continuous matrix */ + CV_BadModelOrChSeq= -14, /**/ + CV_BadNumChannels= -15, /**< bad number of channels, for example, some functions accept only single channel matrices */ + CV_BadNumChannel1U= -16, /**/ + CV_BadDepth= -17, /**< input image depth is not supported by the function */ + CV_BadAlphaChannel= -18, /**/ + CV_BadOrder= -19, /**< number of dimensions is out of range */ + CV_BadOrigin= -20, /**< incorrect input origin */ + CV_BadAlign= -21, /**< incorrect input align */ + CV_BadCallBack= -22, /**/ + CV_BadTileSize= -23, /**/ + CV_BadCOI= -24, /**< input COI is not supported */ + CV_BadROISize= -25, /**< incorrect input roi */ + CV_MaskIsTiled= -26, /**/ + CV_StsNullPtr= -27, /**< null pointer */ + CV_StsVecLengthErr= -28, /**< incorrect vector length */ + CV_StsFilterStructContentErr= -29, /**< incorrect filter structure content */ + CV_StsKernelStructContentErr= -30, /**< incorrect transform kernel content */ + CV_StsFilterOffsetErr= -31, /**< incorrect filter offset value */ + CV_StsBadSize= -201, /**< the input/output structure size is incorrect */ + CV_StsDivByZero= -202, /**< division by zero */ + CV_StsInplaceNotSupported= -203, /**< in-place operation is not supported */ + CV_StsObjectNotFound= -204, /**< request can't be completed */ + CV_StsUnmatchedFormats= -205, /**< formats of input/output arrays differ */ + CV_StsBadFlag= -206, /**< flag is wrong or not supported */ + CV_StsBadPoint= -207, /**< bad CvPoint */ + CV_StsBadMask= -208, /**< bad format of mask (neither 8uC1 nor 8sC1)*/ + CV_StsUnmatchedSizes= -209, /**< sizes of input/output structures do not match */ + CV_StsUnsupportedFormat= -210, /**< the data format/type is not supported by the function*/ + CV_StsOutOfRange= -211, /**< some of parameters are out of range */ + CV_StsParseError= -212, /**< invalid syntax/structure of the parsed file */ + CV_StsNotImplemented= -213, /**< the requested function/feature is not implemented */ + CV_StsBadMemBlock= -214, /**< an allocated block has been corrupted */ + CV_StsAssert= -215, /**< assertion failed */ + CV_GpuNotSupported= -216, /**< no CUDA support */ + CV_GpuApiCallError= -217, /**< GPU API call error */ + CV_OpenGlNotSupported= -218, /**< no OpenGL support */ + CV_OpenGlApiCallError= -219, /**< OpenGL API call error */ + CV_OpenCLApiCallError= -220, /**< OpenCL API call error */ + CV_OpenCLDoubleNotSupported= -221, + CV_OpenCLInitError= -222, /**< OpenCL initialization error */ + CV_OpenCLNoAMDBlasFft= -223 +}; + +/****************************************************************************************\ +* Common macros and inline functions * +\****************************************************************************************/ + +#define CV_SWAP(a,b,t) ((t) = (a), (a) = (b), (b) = (t)) + +/** min & max without jumps */ +#define CV_IMIN(a, b) ((a) ^ (((a)^(b)) & (((a) < (b)) - 1))) + +#define CV_IMAX(a, b) ((a) ^ (((a)^(b)) & (((a) > (b)) - 1))) + +/** absolute value without jumps */ +#ifndef __cplusplus +# define CV_IABS(a) (((a) ^ ((a) < 0 ? -1 : 0)) - ((a) < 0 ? -1 : 0)) +#else +# define CV_IABS(a) abs(a) +#endif +#define CV_CMP(a,b) (((a) > (b)) - ((a) < (b))) +#define CV_SIGN(a) CV_CMP((a),0) + +#define cvInvSqrt(value) ((float)(1./sqrt(value))) +#define cvSqrt(value) ((float)sqrt(value)) + + +/*************** Random number generation *******************/ + +typedef uint64 CvRNG; + +#define CV_RNG_COEFF 4164903690U + +/** @brief Initializes a random number generator state. + +The function initializes a random number generator and returns the state. The pointer to the state +can be then passed to the cvRandInt, cvRandReal and cvRandArr functions. In the current +implementation a multiply-with-carry generator is used. +@param seed 64-bit value used to initiate a random sequence +@sa the C++ class RNG replaced CvRNG. + */ +CV_INLINE CvRNG cvRNG( int64 seed CV_DEFAULT(-1)) +{ + CvRNG rng = seed ? (uint64)seed : (uint64)(int64)-1; + return rng; +} + +/** @brief Returns a 32-bit unsigned integer and updates RNG. + +The function returns a uniformly-distributed random 32-bit unsigned integer and updates the RNG +state. It is similar to the rand() function from the C runtime library, except that OpenCV functions +always generates a 32-bit random number, regardless of the platform. +@param rng CvRNG state initialized by cvRNG. + */ +CV_INLINE unsigned cvRandInt( CvRNG* rng ) +{ + uint64 temp = *rng; + temp = (uint64)(unsigned)temp*CV_RNG_COEFF + (temp >> 32); + *rng = temp; + return (unsigned)temp; +} + +/** @brief Returns a floating-point random number and updates RNG. + +The function returns a uniformly-distributed random floating-point number between 0 and 1 (1 is not +included). +@param rng RNG state initialized by cvRNG + */ +CV_INLINE double cvRandReal( CvRNG* rng ) +{ + return cvRandInt(rng)*2.3283064365386962890625e-10 /* 2^-32 */; +} + +/****************************************************************************************\ +* Image type (IplImage) * +\****************************************************************************************/ + +#ifndef HAVE_IPL + +/* + * The following definitions (until #endif) + * is an extract from IPL headers. + * Copyright (c) 1995 Intel Corporation. + */ +#define IPL_DEPTH_SIGN 0x80000000 + +#define IPL_DEPTH_1U 1 +#define IPL_DEPTH_8U 8 +#define IPL_DEPTH_16U 16 +#define IPL_DEPTH_32F 32 + +#define IPL_DEPTH_8S (IPL_DEPTH_SIGN| 8) +#define IPL_DEPTH_16S (IPL_DEPTH_SIGN|16) +#define IPL_DEPTH_32S (IPL_DEPTH_SIGN|32) + +#define IPL_DATA_ORDER_PIXEL 0 +#define IPL_DATA_ORDER_PLANE 1 + +#define IPL_ORIGIN_TL 0 +#define IPL_ORIGIN_BL 1 + +#define IPL_ALIGN_4BYTES 4 +#define IPL_ALIGN_8BYTES 8 +#define IPL_ALIGN_16BYTES 16 +#define IPL_ALIGN_32BYTES 32 + +#define IPL_ALIGN_DWORD IPL_ALIGN_4BYTES +#define IPL_ALIGN_QWORD IPL_ALIGN_8BYTES + +#define IPL_BORDER_CONSTANT 0 +#define IPL_BORDER_REPLICATE 1 +#define IPL_BORDER_REFLECT 2 +#define IPL_BORDER_WRAP 3 + +#ifdef __cplusplus +typedef struct _IplImage IplImage; +CV_EXPORTS _IplImage cvIplImage(const cv::Mat& m); +#endif + +/** The IplImage is taken from the Intel Image Processing Library, in which the format is native. OpenCV +only supports a subset of possible IplImage formats, as outlined in the parameter list above. + +In addition to the above restrictions, OpenCV handles ROIs differently. OpenCV functions require +that the image size or ROI size of all source and destination images match exactly. On the other +hand, the Intel Image Processing Library processes the area of intersection between the source and +destination images (or ROIs), allowing them to vary independently. +*/ +typedef struct +_IplImage +{ + int nSize; /**< sizeof(IplImage) */ + int ID; /**< version (=0)*/ + int nChannels; /**< Most of OpenCV functions support 1,2,3 or 4 channels */ + int alphaChannel; /**< Ignored by OpenCV */ + int depth; /**< Pixel depth in bits: IPL_DEPTH_8U, IPL_DEPTH_8S, IPL_DEPTH_16S, + IPL_DEPTH_32S, IPL_DEPTH_32F and IPL_DEPTH_64F are supported. */ + char colorModel[4]; /**< Ignored by OpenCV */ + char channelSeq[4]; /**< ditto */ + int dataOrder; /**< 0 - interleaved color channels, 1 - separate color channels. + cvCreateImage can only create interleaved images */ + int origin; /**< 0 - top-left origin, + 1 - bottom-left origin (Windows bitmaps style). */ + int align; /**< Alignment of image rows (4 or 8). + OpenCV ignores it and uses widthStep instead. */ + int width; /**< Image width in pixels. */ + int height; /**< Image height in pixels. */ + struct _IplROI *roi; /**< Image ROI. If NULL, the whole image is selected. */ + struct _IplImage *maskROI; /**< Must be NULL. */ + void *imageId; /**< " " */ + struct _IplTileInfo *tileInfo; /**< " " */ + int imageSize; /**< Image data size in bytes + (==image->height*image->widthStep + in case of interleaved data)*/ + char *imageData; /**< Pointer to aligned image data. */ + int widthStep; /**< Size of aligned image row in bytes. */ + int BorderMode[4]; /**< Ignored by OpenCV. */ + int BorderConst[4]; /**< Ditto. */ + char *imageDataOrigin; /**< Pointer to very origin of image data + (not necessarily aligned) - + needed for correct deallocation */ + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + _IplImage() {} + _IplImage(const cv::Mat& m) { *this = cvIplImage(m); } +#endif +} +IplImage; + +CV_INLINE IplImage cvIplImage() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + IplImage self = CV_STRUCT_INITIALIZER; self.nSize = sizeof(IplImage); return self; +#else + return _IplImage(); +#endif +} + +typedef struct _IplTileInfo IplTileInfo; + +typedef struct _IplROI +{ + int coi; /**< 0 - no COI (all channels are selected), 1 - 0th channel is selected ...*/ + int xOffset; + int yOffset; + int width; + int height; +} +IplROI; + +typedef struct _IplConvKernel +{ + int nCols; + int nRows; + int anchorX; + int anchorY; + int *values; + int nShiftR; +} +IplConvKernel; + +typedef struct _IplConvKernelFP +{ + int nCols; + int nRows; + int anchorX; + int anchorY; + float *values; +} +IplConvKernelFP; + +#define IPL_IMAGE_HEADER 1 +#define IPL_IMAGE_DATA 2 +#define IPL_IMAGE_ROI 4 + +#endif/*HAVE_IPL*/ + +/** extra border mode */ +#define IPL_BORDER_REFLECT_101 4 +#define IPL_BORDER_TRANSPARENT 5 + +#define IPL_IMAGE_MAGIC_VAL ((int)sizeof(IplImage)) +#define CV_TYPE_NAME_IMAGE "opencv-image" + +#define CV_IS_IMAGE_HDR(img) \ + ((img) != NULL && ((const IplImage*)(img))->nSize == sizeof(IplImage)) + +#define CV_IS_IMAGE(img) \ + (CV_IS_IMAGE_HDR(img) && ((IplImage*)img)->imageData != NULL) + +/** for storing double-precision + floating point data in IplImage's */ +#define IPL_DEPTH_64F 64 + +/** get reference to pixel at (col,row), + for multi-channel images (col) should be multiplied by number of channels */ +#define CV_IMAGE_ELEM( image, elemtype, row, col ) \ + (((elemtype*)((image)->imageData + (image)->widthStep*(row)))[(col)]) + +/****************************************************************************************\ +* Matrix type (CvMat) * +\****************************************************************************************/ + +#define CV_AUTO_STEP 0x7fffffff +#define CV_WHOLE_ARR cvSlice( 0, 0x3fffffff ) + +#define CV_MAGIC_MASK 0xFFFF0000 +#define CV_MAT_MAGIC_VAL 0x42420000 +#define CV_TYPE_NAME_MAT "opencv-matrix" + +#ifdef __cplusplus +typedef struct CvMat CvMat; +CV_INLINE CvMat cvMat(const cv::Mat& m); +#endif + +/** Matrix elements are stored row by row. Element (i, j) (i - 0-based row index, j - 0-based column +index) of a matrix can be retrieved or modified using CV_MAT_ELEM macro: + + uchar pixval = CV_MAT_ELEM(grayimg, uchar, i, j) + CV_MAT_ELEM(cameraMatrix, float, 0, 2) = image.width*0.5f; + +To access multiple-channel matrices, you can use +CV_MAT_ELEM(matrix, type, i, j\*nchannels + channel_idx). + +@deprecated CvMat is now obsolete; consider using Mat instead. + */ +typedef struct CvMat +{ + int type; + int step; + + /* for internal use only */ + int* refcount; + int hdr_refcount; + + union + { + uchar* ptr; + short* s; + int* i; + float* fl; + double* db; + } data; + +#ifdef __cplusplus + union + { + int rows; + int height; + }; + + union + { + int cols; + int width; + }; +#else + int rows; + int cols; +#endif + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvMat() {} + CvMat(const cv::Mat& m) { *this = cvMat(m); } +#endif +} +CvMat; + + +#define CV_IS_MAT_HDR(mat) \ + ((mat) != NULL && \ + (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \ + ((const CvMat*)(mat))->cols > 0 && ((const CvMat*)(mat))->rows > 0) + +#define CV_IS_MAT_HDR_Z(mat) \ + ((mat) != NULL && \ + (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \ + ((const CvMat*)(mat))->cols >= 0 && ((const CvMat*)(mat))->rows >= 0) + +#define CV_IS_MAT(mat) \ + (CV_IS_MAT_HDR(mat) && ((const CvMat*)(mat))->data.ptr != NULL) + +#define CV_IS_MASK_ARR(mat) \ + (((mat)->type & (CV_MAT_TYPE_MASK & ~CV_8SC1)) == 0) + +#define CV_ARE_TYPES_EQ(mat1, mat2) \ + ((((mat1)->type ^ (mat2)->type) & CV_MAT_TYPE_MASK) == 0) + +#define CV_ARE_CNS_EQ(mat1, mat2) \ + ((((mat1)->type ^ (mat2)->type) & CV_MAT_CN_MASK) == 0) + +#define CV_ARE_DEPTHS_EQ(mat1, mat2) \ + ((((mat1)->type ^ (mat2)->type) & CV_MAT_DEPTH_MASK) == 0) + +#define CV_ARE_SIZES_EQ(mat1, mat2) \ + ((mat1)->rows == (mat2)->rows && (mat1)->cols == (mat2)->cols) + +#define CV_IS_MAT_CONST(mat) \ + (((mat)->rows|(mat)->cols) == 1) + +#define IPL2CV_DEPTH(depth) \ + ((((CV_8U)+(CV_16U<<4)+(CV_32F<<8)+(CV_64F<<16)+(CV_8S<<20)+ \ + (CV_16S<<24)+(CV_32S<<28)) >> ((((depth) & 0xF0) >> 2) + \ + (((depth) & IPL_DEPTH_SIGN) ? 20 : 0))) & 15) + +/** Inline constructor. No data is allocated internally!!! + * (Use together with cvCreateData, or use cvCreateMat instead to + * get a matrix with allocated data): + */ +CV_INLINE CvMat cvMat( int rows, int cols, int type, void* data CV_DEFAULT(NULL)) +{ + CvMat m; + + assert( (unsigned)CV_MAT_DEPTH(type) <= CV_64F ); + type = CV_MAT_TYPE(type); + m.type = CV_MAT_MAGIC_VAL | CV_MAT_CONT_FLAG | type; + m.cols = cols; + m.rows = rows; + m.step = m.cols*CV_ELEM_SIZE(type); + m.data.ptr = (uchar*)data; + m.refcount = NULL; + m.hdr_refcount = 0; + + return m; +} + +#ifdef __cplusplus + +CV_INLINE CvMat cvMat(const cv::Mat& m) +{ + CvMat self; + CV_DbgAssert(m.dims <= 2); + self = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data); + self.step = (int)m.step[0]; + self.type = (self.type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG); + return self; +} +CV_INLINE CvMat cvMat() +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMat self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMat(); +#endif +} +CV_INLINE CvMat cvMat(const CvMat& m) +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMat self = CV_STRUCT_INITIALIZER; memcpy(&self, &m, sizeof(self)); return self; +#else + return CvMat(m); +#endif +} + +#endif // __cplusplus + + +#define CV_MAT_ELEM_PTR_FAST( mat, row, col, pix_size ) \ + (assert( (unsigned)(row) < (unsigned)(mat).rows && \ + (unsigned)(col) < (unsigned)(mat).cols ), \ + (mat).data.ptr + (size_t)(mat).step*(row) + (pix_size)*(col)) + +#define CV_MAT_ELEM_PTR( mat, row, col ) \ + CV_MAT_ELEM_PTR_FAST( mat, row, col, CV_ELEM_SIZE((mat).type) ) + +#define CV_MAT_ELEM( mat, elemtype, row, col ) \ + (*(elemtype*)CV_MAT_ELEM_PTR_FAST( mat, row, col, sizeof(elemtype))) + +/** @brief Returns the particular element of single-channel floating-point matrix. + +The function is a fast replacement for cvGetReal2D in the case of single-channel floating-point +matrices. It is faster because it is inline, it does fewer checks for array type and array element +type, and it checks for the row and column ranges only in debug mode. +@param mat Input matrix +@param row The zero-based index of row +@param col The zero-based index of column + */ +CV_INLINE double cvmGet( const CvMat* mat, int row, int col ) +{ + int type; + + type = CV_MAT_TYPE(mat->type); + assert( (unsigned)row < (unsigned)mat->rows && + (unsigned)col < (unsigned)mat->cols ); + + if( type == CV_32FC1 ) + return ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col]; + else + { + assert( type == CV_64FC1 ); + return ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col]; + } +} + +/** @brief Sets a specific element of a single-channel floating-point matrix. + +The function is a fast replacement for cvSetReal2D in the case of single-channel floating-point +matrices. It is faster because it is inline, it does fewer checks for array type and array element +type, and it checks for the row and column ranges only in debug mode. +@param mat The matrix +@param row The zero-based index of row +@param col The zero-based index of column +@param value The new value of the matrix element + */ +CV_INLINE void cvmSet( CvMat* mat, int row, int col, double value ) +{ + int type; + type = CV_MAT_TYPE(mat->type); + assert( (unsigned)row < (unsigned)mat->rows && + (unsigned)col < (unsigned)mat->cols ); + + if( type == CV_32FC1 ) + ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = (float)value; + else + { + assert( type == CV_64FC1 ); + ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = value; + } +} + + +CV_INLINE int cvIplDepth( int type ) +{ + int depth = CV_MAT_DEPTH(type); + return CV_ELEM_SIZE1(depth)*8 | (depth == CV_8S || depth == CV_16S || + depth == CV_32S ? IPL_DEPTH_SIGN : 0); +} + + +/****************************************************************************************\ +* Multi-dimensional dense array (CvMatND) * +\****************************************************************************************/ + +#define CV_MATND_MAGIC_VAL 0x42430000 +#define CV_TYPE_NAME_MATND "opencv-nd-matrix" + +#define CV_MAX_DIM 32 + +#ifdef __cplusplus +typedef struct CvMatND CvMatND; +CV_EXPORTS CvMatND cvMatND(const cv::Mat& m); +#endif + +/** + @deprecated consider using cv::Mat instead + */ +typedef struct +CvMatND +{ + int type; + int dims; + + int* refcount; + int hdr_refcount; + + union + { + uchar* ptr; + float* fl; + double* db; + int* i; + short* s; + } data; + + struct + { + int size; + int step; + } + dim[CV_MAX_DIM]; + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvMatND() {} + CvMatND(const cv::Mat& m) { *this = cvMatND(m); } +#endif +} +CvMatND; + + +CV_INLINE CvMatND cvMatND() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvMatND self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMatND(); +#endif +} + +#define CV_IS_MATND_HDR(mat) \ + ((mat) != NULL && (((const CvMatND*)(mat))->type & CV_MAGIC_MASK) == CV_MATND_MAGIC_VAL) + +#define CV_IS_MATND(mat) \ + (CV_IS_MATND_HDR(mat) && ((const CvMatND*)(mat))->data.ptr != NULL) + + +/****************************************************************************************\ +* Multi-dimensional sparse array (CvSparseMat) * +\****************************************************************************************/ + +#define CV_SPARSE_MAT_MAGIC_VAL 0x42440000 +#define CV_TYPE_NAME_SPARSE_MAT "opencv-sparse-matrix" + +struct CvSet; + +typedef struct CvSparseMat +{ + int type; + int dims; + int* refcount; + int hdr_refcount; + + struct CvSet* heap; + void** hashtable; + int hashsize; + int valoffset; + int idxoffset; + int size[CV_MAX_DIM]; + +#ifdef __cplusplus + CV_EXPORTS void copyToSparseMat(cv::SparseMat& m) const; +#endif +} +CvSparseMat; + +#ifdef __cplusplus +CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m); +#endif + +#define CV_IS_SPARSE_MAT_HDR(mat) \ + ((mat) != NULL && \ + (((const CvSparseMat*)(mat))->type & CV_MAGIC_MASK) == CV_SPARSE_MAT_MAGIC_VAL) + +#define CV_IS_SPARSE_MAT(mat) \ + CV_IS_SPARSE_MAT_HDR(mat) + +/**************** iteration through a sparse array *****************/ + +typedef struct CvSparseNode +{ + unsigned hashval; + struct CvSparseNode* next; +} +CvSparseNode; + +typedef struct CvSparseMatIterator +{ + CvSparseMat* mat; + CvSparseNode* node; + int curidx; +} +CvSparseMatIterator; + +#define CV_NODE_VAL(mat,node) ((void*)((uchar*)(node) + (mat)->valoffset)) +#define CV_NODE_IDX(mat,node) ((int*)((uchar*)(node) + (mat)->idxoffset)) + +/****************************************************************************************\ +* Histogram * +\****************************************************************************************/ + +typedef int CvHistType; + +#define CV_HIST_MAGIC_VAL 0x42450000 +#define CV_HIST_UNIFORM_FLAG (1 << 10) + +/** indicates whether bin ranges are set already or not */ +#define CV_HIST_RANGES_FLAG (1 << 11) + +#define CV_HIST_ARRAY 0 +#define CV_HIST_SPARSE 1 +#define CV_HIST_TREE CV_HIST_SPARSE + +/** should be used as a parameter only, + it turns to CV_HIST_UNIFORM_FLAG of hist->type */ +#define CV_HIST_UNIFORM 1 + +typedef struct CvHistogram +{ + int type; + CvArr* bins; + float thresh[CV_MAX_DIM][2]; /**< For uniform histograms. */ + float** thresh2; /**< For non-uniform histograms. */ + CvMatND mat; /**< Embedded matrix header for array histograms. */ +} +CvHistogram; + +#define CV_IS_HIST( hist ) \ + ((hist) != NULL && \ + (((CvHistogram*)(hist))->type & CV_MAGIC_MASK) == CV_HIST_MAGIC_VAL && \ + (hist)->bins != NULL) + +#define CV_IS_UNIFORM_HIST( hist ) \ + (((hist)->type & CV_HIST_UNIFORM_FLAG) != 0) + +#define CV_IS_SPARSE_HIST( hist ) \ + CV_IS_SPARSE_MAT((hist)->bins) + +#define CV_HIST_HAS_RANGES( hist ) \ + (((hist)->type & CV_HIST_RANGES_FLAG) != 0) + +/****************************************************************************************\ +* Other supplementary data type definitions * +\****************************************************************************************/ + +/*************************************** CvRect *****************************************/ +/** @sa Rect_ */ +typedef struct CvRect +{ + int x; + int y; + int width; + int height; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvRect() __attribute__(( warning("Non-initialized variable") )) {}; + template CvRect(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 4); + x = y = width = height = 0; + if (list.size() == 4) + { + x = list.begin()[0]; y = list.begin()[1]; width = list.begin()[2]; height = list.begin()[3]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvRect(int _x = 0, int _y = 0, int w = 0, int h = 0): x(_x), y(_y), width(w), height(h) {} + template + CvRect(const cv::Rect_<_Tp>& r): x(cv::saturate_cast(r.x)), y(cv::saturate_cast(r.y)), width(cv::saturate_cast(r.width)), height(cv::saturate_cast(r.height)) {} +#endif +#ifdef __cplusplus + template + operator cv::Rect_<_Tp>() const { return cv::Rect_<_Tp>((_Tp)x, (_Tp)y, (_Tp)width, (_Tp)height); } +#endif +} +CvRect; + +/** constructs CvRect structure. */ +CV_INLINE CvRect cvRect( int x, int y, int width, int height ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvRect r = {x, y, width, height}; +#else + CvRect r(x, y , width, height); +#endif + return r; +} +#ifdef __cplusplus +CV_INLINE CvRect cvRect(const cv::Rect& rc) { return cvRect(rc.x, rc.y, rc.width, rc.height); } +#endif + +CV_INLINE IplROI cvRectToROI( CvRect rect, int coi ) +{ + IplROI roi; + roi.xOffset = rect.x; + roi.yOffset = rect.y; + roi.width = rect.width; + roi.height = rect.height; + roi.coi = coi; + + return roi; +} + + +CV_INLINE CvRect cvROIToRect( IplROI roi ) +{ + return cvRect( roi.xOffset, roi.yOffset, roi.width, roi.height ); +} + +/*********************************** CvTermCriteria *************************************/ + +#define CV_TERMCRIT_ITER 1 +#define CV_TERMCRIT_NUMBER CV_TERMCRIT_ITER +#define CV_TERMCRIT_EPS 2 + +/** @sa TermCriteria + */ +typedef struct CvTermCriteria +{ + int type; /**< may be combination of + CV_TERMCRIT_ITER + CV_TERMCRIT_EPS */ + int max_iter; + double epsilon; +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvTermCriteria(int _type = 0, int _iter = 0, double _eps = 0) : type(_type), max_iter(_iter), epsilon(_eps) {} + CvTermCriteria(const cv::TermCriteria& t) : type(t.type), max_iter(t.maxCount), epsilon(t.epsilon) {} +#endif +#ifdef __cplusplus + operator cv::TermCriteria() const { return cv::TermCriteria(type, max_iter, epsilon); } +#endif +} +CvTermCriteria; + +CV_INLINE CvTermCriteria cvTermCriteria( int type, int max_iter, double epsilon ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvTermCriteria t = { type, max_iter, (float)epsilon}; +#else + CvTermCriteria t(type, max_iter, epsilon); +#endif + return t; +} +#ifdef __cplusplus +CV_INLINE CvTermCriteria cvTermCriteria(const cv::TermCriteria& t) { return cvTermCriteria(t.type, t.maxCount, t.epsilon); } +#endif + + +/******************************* CvPoint and variants ***********************************/ + +typedef struct CvPoint +{ + int x; + int y; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvPoint(int _x = 0, int _y = 0): x(_x), y(_y) {} + template + CvPoint(const cv::Point_<_Tp>& pt): x((int)pt.x), y((int)pt.y) {} +#endif +#ifdef __cplusplus + template + operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); } +#endif +} +CvPoint; + +/** constructs CvPoint structure. */ +CV_INLINE CvPoint cvPoint( int x, int y ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint p = {x, y}; +#else + CvPoint p(x, y); +#endif + return p; +} +#ifdef __cplusplus +CV_INLINE CvPoint cvPoint(const cv::Point& pt) { return cvPoint(pt.x, pt.y); } +#endif + +typedef struct CvPoint2D32f +{ + float x; + float y; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint2D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint2D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvPoint2D32f(float _x = 0, float _y = 0): x(_x), y(_y) {} + template + CvPoint2D32f(const cv::Point_<_Tp>& pt): x((float)pt.x), y((float)pt.y) {} +#endif +#ifdef __cplusplus + template + operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); } +#endif +} +CvPoint2D32f; + +/** constructs CvPoint2D32f structure. */ +CV_INLINE CvPoint2D32f cvPoint2D32f( double x, double y ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint2D32f p = { (float)x, (float)y }; +#else + CvPoint2D32f p((float)x, (float)y); +#endif + return p; +} + +#ifdef __cplusplus +template +CvPoint2D32f cvPoint2D32f(const cv::Point_<_Tp>& pt) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint2D32f p = { (float)pt.x, (float)pt.y }; +#else + CvPoint2D32f p((float)pt.x, (float)pt.y); +#endif + return p; +} +#endif + +/** converts CvPoint to CvPoint2D32f. */ +CV_INLINE CvPoint2D32f cvPointTo32f( CvPoint point ) +{ + return cvPoint2D32f( (float)point.x, (float)point.y ); +} + +/** converts CvPoint2D32f to CvPoint. */ +CV_INLINE CvPoint cvPointFrom32f( CvPoint2D32f point ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint ipt = { cvRound(point.x), cvRound(point.y) }; +#else + CvPoint ipt(cvRound(point.x), cvRound(point.y)); +#endif + return ipt; +} + + +typedef struct CvPoint3D32f +{ + float x; + float y; + float z; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint3D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint3D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 3); + x = y = z = 0; + if (list.size() == 3) + { + x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvPoint3D32f(float _x = 0, float _y = 0, float _z = 0): x(_x), y(_y), z(_z) {} + template + CvPoint3D32f(const cv::Point3_<_Tp>& pt): x((float)pt.x), y((float)pt.y), z((float)pt.z) {} +#endif +#ifdef __cplusplus + template + operator cv::Point3_<_Tp>() const { return cv::Point3_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y), cv::saturate_cast<_Tp>(z)); } +#endif +} +CvPoint3D32f; + +/** constructs CvPoint3D32f structure. */ +CV_INLINE CvPoint3D32f cvPoint3D32f( double x, double y, double z ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint3D32f p = { (float)x, (float)y, (float)z }; +#else + CvPoint3D32f p((float)x, (float)y, (float)z); +#endif + return p; +} + +#ifdef __cplusplus +template +CvPoint3D32f cvPoint3D32f(const cv::Point3_<_Tp>& pt) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint3D32f p = { (float)pt.x, (float)pt.y, (float)pt.z }; +#else + CvPoint3D32f p((float)pt.x, (float)pt.y, (float)pt.z); +#endif + return p; +} +#endif + + +typedef struct CvPoint2D64f +{ + double x; + double y; +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint2D64f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint2D64f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#endif +} +CvPoint2D64f; + +/** constructs CvPoint2D64f structure.*/ +CV_INLINE CvPoint2D64f cvPoint2D64f( double x, double y ) +{ + CvPoint2D64f p = { x, y }; + return p; +} + + +typedef struct CvPoint3D64f +{ + double x; + double y; + double z; +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint3D64f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint3D64f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 3); + x = y = z = 0; + if (list.size() == 3) + { + x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2]; + } + }; +#endif +} +CvPoint3D64f; + +/** constructs CvPoint3D64f structure. */ +CV_INLINE CvPoint3D64f cvPoint3D64f( double x, double y, double z ) +{ + CvPoint3D64f p = { x, y, z }; + return p; +} + + +/******************************** CvSize's & CvBox **************************************/ + +typedef struct CvSize +{ + int width; + int height; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSize() __attribute__(( warning("Non-initialized variable") )) {} + template CvSize(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + width = 0; height = 0; + if (list.size() == 2) + { + width = list.begin()[0]; height = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvSize(int w = 0, int h = 0): width(w), height(h) {} + template + CvSize(const cv::Size_<_Tp>& sz): width(cv::saturate_cast(sz.width)), height(cv::saturate_cast(sz.height)) {} +#endif +#ifdef __cplusplus + template + operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); } +#endif +} +CvSize; + +/** constructs CvSize structure. */ +CV_INLINE CvSize cvSize( int width, int height ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize s = { width, height }; +#else + CvSize s(width, height); +#endif + return s; +} + +#ifdef __cplusplus +CV_INLINE CvSize cvSize(const cv::Size& sz) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize s = { sz.width, sz.height }; +#else + CvSize s(sz.width, sz.height); +#endif + return s; +} +#endif + +typedef struct CvSize2D32f +{ + float width; + float height; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSize2D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvSize2D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + width = 0; height = 0; + if (list.size() == 2) + { + width = list.begin()[0]; height = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvSize2D32f(float w = 0, float h = 0): width(w), height(h) {} + template + CvSize2D32f(const cv::Size_<_Tp>& sz): width(cv::saturate_cast(sz.width)), height(cv::saturate_cast(sz.height)) {} +#endif +#ifdef __cplusplus + template + operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); } +#endif +} +CvSize2D32f; + +/** constructs CvSize2D32f structure. */ +CV_INLINE CvSize2D32f cvSize2D32f( double width, double height ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize2D32f s = { (float)width, (float)height }; +#else + CvSize2D32f s((float)width, (float)height); +#endif + return s; +} +#ifdef __cplusplus +template +CvSize2D32f cvSize2D32f(const cv::Size_<_Tp>& sz) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize2D32f s = { (float)sz.width, (float)sz.height }; +#else + CvSize2D32f s((float)sz.width, (float)sz.height); +#endif + return s; +} +#endif + +/** @sa RotatedRect + */ +typedef struct CvBox2D +{ + CvPoint2D32f center; /**< Center of the box. */ + CvSize2D32f size; /**< Box width and length. */ + float angle; /**< Angle between the horizontal axis */ + /**< and the first side (i.e. length) in degrees */ + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) : center(c), size(s), angle(a) {} + CvBox2D(const cv::RotatedRect& rr) : center(rr.center), size(rr.size), angle(rr.angle) {} +#endif +#ifdef __cplusplus + operator cv::RotatedRect() const { return cv::RotatedRect(center, size, angle); } +#endif +} +CvBox2D; + + +#ifdef __cplusplus +CV_INLINE CvBox2D cvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) +{ + CvBox2D self; + self.center = c; + self.size = s; + self.angle = a; + return self; +} +CV_INLINE CvBox2D cvBox2D(const cv::RotatedRect& rr) +{ + CvBox2D self; + self.center = cvPoint2D32f(rr.center); + self.size = cvSize2D32f(rr.size); + self.angle = rr.angle; + return self; +} +#endif + + +/** Line iterator state: */ +typedef struct CvLineIterator +{ + /** Pointer to the current point: */ + uchar* ptr; + + /* Bresenham algorithm state: */ + int err; + int plus_delta; + int minus_delta; + int plus_step; + int minus_step; +} +CvLineIterator; + + + +/************************************* CvSlice ******************************************/ +#define CV_WHOLE_SEQ_END_INDEX 0x3fffffff +#define CV_WHOLE_SEQ cvSlice(0, CV_WHOLE_SEQ_END_INDEX) + +typedef struct CvSlice +{ + int start_index, end_index; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSlice() __attribute__(( warning("Non-initialized variable") )) {} + template CvSlice(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + start_index = end_index = 0; + if (list.size() == 2) + { + start_index = list.begin()[0]; end_index = list.begin()[1]; + } + }; +#endif +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__) + CvSlice(int start = 0, int end = 0) : start_index(start), end_index(end) {} + CvSlice(const cv::Range& r) { *this = (r.start != INT_MIN && r.end != INT_MAX) ? CvSlice(r.start, r.end) : CvSlice(0, CV_WHOLE_SEQ_END_INDEX); } + operator cv::Range() const { return (start_index == 0 && end_index == CV_WHOLE_SEQ_END_INDEX ) ? cv::Range::all() : cv::Range(start_index, end_index); } +#endif +} +CvSlice; + +CV_INLINE CvSlice cvSlice( int start, int end ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__)) + CvSlice slice = { start, end }; +#else + CvSlice slice(start, end); +#endif + return slice; +} + +#if defined(__cplusplus) +CV_INLINE CvSlice cvSlice(const cv::Range& r) +{ + CvSlice slice = (r.start != INT_MIN && r.end != INT_MAX) ? cvSlice(r.start, r.end) : cvSlice(0, CV_WHOLE_SEQ_END_INDEX); + return slice; +} +#endif + + +/************************************* CvScalar *****************************************/ +/** @sa Scalar_ + */ +typedef struct CvScalar +{ + double val[4]; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvScalar() __attribute__(( warning("Non-initialized variable") )) {} + CvScalar(const std::initializer_list list) + { + CV_Assert(list.size() == 0 || list.size() == 4); + val[0] = val[1] = val[2] = val[3] = 0; + if (list.size() == 4) + { + val[0] = list.begin()[0]; val[1] = list.begin()[1]; val[2] = list.begin()[2]; val[3] = list.begin()[3]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvScalar() {} + CvScalar(double d0, double d1 = 0, double d2 = 0, double d3 = 0) { val[0] = d0; val[1] = d1; val[2] = d2; val[3] = d3; } + template + CvScalar(const cv::Scalar_<_Tp>& s) { val[0] = s.val[0]; val[1] = s.val[1]; val[2] = s.val[2]; val[3] = s.val[3]; } + template + CvScalar(const cv::Vec<_Tp, cn>& v) + { + int i; + for( i = 0; i < (cn < 4 ? cn : 4); i++ ) val[i] = v.val[i]; + for( ; i < 4; i++ ) val[i] = 0; + } +#endif +#ifdef __cplusplus + template + operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); } +#endif +} +CvScalar; + +CV_INLINE CvScalar cvScalar( double val0, double val1 CV_DEFAULT(0), + double val2 CV_DEFAULT(0), double val3 CV_DEFAULT(0)) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = val0; scalar.val[1] = val1; + scalar.val[2] = val2; scalar.val[3] = val3; + return scalar; +} + +#ifdef __cplusplus +CV_INLINE CvScalar cvScalar() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = scalar.val[1] = scalar.val[2] = scalar.val[3] = 0; + return scalar; +} +CV_INLINE CvScalar cvScalar(const cv::Scalar& s) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = s.val[0]; + scalar.val[1] = s.val[1]; + scalar.val[2] = s.val[2]; + scalar.val[3] = s.val[3]; + return scalar; +} +#endif + +CV_INLINE CvScalar cvRealScalar( double val0 ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = val0; + scalar.val[1] = scalar.val[2] = scalar.val[3] = 0; + return scalar; +} + +CV_INLINE CvScalar cvScalarAll( double val0123 ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = val0123; + scalar.val[1] = val0123; + scalar.val[2] = val0123; + scalar.val[3] = val0123; + return scalar; +} + +/****************************************************************************************\ +* Dynamic Data structures * +\****************************************************************************************/ + +/******************************** Memory storage ****************************************/ + +typedef struct CvMemBlock +{ + struct CvMemBlock* prev; + struct CvMemBlock* next; +} +CvMemBlock; + +#define CV_STORAGE_MAGIC_VAL 0x42890000 + +typedef struct CvMemStorage +{ + int signature; + CvMemBlock* bottom; /**< First allocated block. */ + CvMemBlock* top; /**< Current memory block - top of the stack. */ + struct CvMemStorage* parent; /**< We get new blocks from parent as needed. */ + int block_size; /**< Block size. */ + int free_space; /**< Remaining free space in current block. */ +} +CvMemStorage; + +#define CV_IS_STORAGE(storage) \ + ((storage) != NULL && \ + (((CvMemStorage*)(storage))->signature & CV_MAGIC_MASK) == CV_STORAGE_MAGIC_VAL) + + +typedef struct CvMemStoragePos +{ + CvMemBlock* top; + int free_space; +} +CvMemStoragePos; + + +/*********************************** Sequence *******************************************/ + +typedef struct CvSeqBlock +{ + struct CvSeqBlock* prev; /**< Previous sequence block. */ + struct CvSeqBlock* next; /**< Next sequence block. */ + int start_index; /**< Index of the first element in the block + */ + /**< sequence->first->start_index. */ + int count; /**< Number of elements in the block. */ + schar* data; /**< Pointer to the first element of the block. */ +} +CvSeqBlock; + + +#define CV_TREE_NODE_FIELDS(node_type) \ + int flags; /**< Miscellaneous flags. */ \ + int header_size; /**< Size of sequence header. */ \ + struct node_type* h_prev; /**< Previous sequence. */ \ + struct node_type* h_next; /**< Next sequence. */ \ + struct node_type* v_prev; /**< 2nd previous sequence. */ \ + struct node_type* v_next /**< 2nd next sequence. */ + +/** + Read/Write sequence. + Elements can be dynamically inserted to or deleted from the sequence. +*/ +#define CV_SEQUENCE_FIELDS() \ + CV_TREE_NODE_FIELDS(CvSeq); \ + int total; /**< Total number of elements. */ \ + int elem_size; /**< Size of sequence element in bytes. */ \ + schar* block_max; /**< Maximal bound of the last block. */ \ + schar* ptr; /**< Current write pointer. */ \ + int delta_elems; /**< Grow seq this many at a time. */ \ + CvMemStorage* storage; /**< Where the seq is stored. */ \ + CvSeqBlock* free_blocks; /**< Free blocks list. */ \ + CvSeqBlock* first; /**< Pointer to the first sequence block. */ + +typedef struct CvSeq +{ + CV_SEQUENCE_FIELDS() +} +CvSeq; + +#define CV_TYPE_NAME_SEQ "opencv-sequence" +#define CV_TYPE_NAME_SEQ_TREE "opencv-sequence-tree" + +/*************************************** Set ********************************************/ +/** @brief Set + Order is not preserved. There can be gaps between sequence elements. + After the element has been inserted it stays in the same place all the time. + The MSB(most-significant or sign bit) of the first field (flags) is 0 iff the element exists. +*/ +#define CV_SET_ELEM_FIELDS(elem_type) \ + int flags; \ + struct elem_type* next_free; + +typedef struct CvSetElem +{ + CV_SET_ELEM_FIELDS(CvSetElem) +} +CvSetElem; + +#define CV_SET_FIELDS() \ + CV_SEQUENCE_FIELDS() \ + CvSetElem* free_elems; \ + int active_count; + +typedef struct CvSet +{ + CV_SET_FIELDS() +} +CvSet; + + +#define CV_SET_ELEM_IDX_MASK ((1 << 26) - 1) +#define CV_SET_ELEM_FREE_FLAG (1 << (sizeof(int)*8-1)) + +/** Checks whether the element pointed by ptr belongs to a set or not */ +#define CV_IS_SET_ELEM( ptr ) (((CvSetElem*)(ptr))->flags >= 0) + +/************************************* Graph ********************************************/ + +/** @name Graph + +We represent a graph as a set of vertices. Vertices contain their adjacency lists (more exactly, +pointers to first incoming or outcoming edge (or 0 if isolated vertex)). Edges are stored in +another set. There is a singly-linked list of incoming/outcoming edges for each vertex. + +Each edge consists of: + +- Two pointers to the starting and ending vertices (vtx[0] and vtx[1] respectively). + + A graph may be oriented or not. In the latter case, edges between vertex i to vertex j are not +distinguished during search operations. + +- Two pointers to next edges for the starting and ending vertices, where next[0] points to the +next edge in the vtx[0] adjacency list and next[1] points to the next edge in the vtx[1] +adjacency list. + +@see CvGraphEdge, CvGraphVtx, CvGraphVtx2D, CvGraph +@{ +*/ +#define CV_GRAPH_EDGE_FIELDS() \ + int flags; \ + float weight; \ + struct CvGraphEdge* next[2]; \ + struct CvGraphVtx* vtx[2]; + + +#define CV_GRAPH_VERTEX_FIELDS() \ + int flags; \ + struct CvGraphEdge* first; + + +typedef struct CvGraphEdge +{ + CV_GRAPH_EDGE_FIELDS() +} +CvGraphEdge; + +typedef struct CvGraphVtx +{ + CV_GRAPH_VERTEX_FIELDS() +} +CvGraphVtx; + +typedef struct CvGraphVtx2D +{ + CV_GRAPH_VERTEX_FIELDS() + CvPoint2D32f* ptr; +} +CvGraphVtx2D; + +/** + Graph is "derived" from the set (this is set a of vertices) + and includes another set (edges) +*/ +#define CV_GRAPH_FIELDS() \ + CV_SET_FIELDS() \ + CvSet* edges; + +typedef struct CvGraph +{ + CV_GRAPH_FIELDS() +} +CvGraph; + +#define CV_TYPE_NAME_GRAPH "opencv-graph" + +/** @} */ + +/*********************************** Chain/Contour *************************************/ + +typedef struct CvChain +{ + CV_SEQUENCE_FIELDS() + CvPoint origin; +} +CvChain; + +#define CV_CONTOUR_FIELDS() \ + CV_SEQUENCE_FIELDS() \ + CvRect rect; \ + int color; \ + int reserved[3]; + +typedef struct CvContour +{ + CV_CONTOUR_FIELDS() +} +CvContour; + +typedef CvContour CvPoint2DSeq; + +/****************************************************************************************\ +* Sequence types * +\****************************************************************************************/ + +#define CV_SEQ_MAGIC_VAL 0x42990000 + +#define CV_IS_SEQ(seq) \ + ((seq) != NULL && (((CvSeq*)(seq))->flags & CV_MAGIC_MASK) == CV_SEQ_MAGIC_VAL) + +#define CV_SET_MAGIC_VAL 0x42980000 +#define CV_IS_SET(set) \ + ((set) != NULL && (((CvSeq*)(set))->flags & CV_MAGIC_MASK) == CV_SET_MAGIC_VAL) + +#define CV_SEQ_ELTYPE_BITS 12 +#define CV_SEQ_ELTYPE_MASK ((1 << CV_SEQ_ELTYPE_BITS) - 1) + +#define CV_SEQ_ELTYPE_POINT CV_32SC2 /**< (x,y) */ +#define CV_SEQ_ELTYPE_CODE CV_8UC1 /**< freeman code: 0..7 */ +#define CV_SEQ_ELTYPE_GENERIC 0 +#define CV_SEQ_ELTYPE_PTR CV_MAKE_TYPE(CV_8U, 8 /*sizeof(void*)*/) +#define CV_SEQ_ELTYPE_PPOINT CV_SEQ_ELTYPE_PTR /**< &(x,y) */ +#define CV_SEQ_ELTYPE_INDEX CV_32SC1 /**< #(x,y) */ +#define CV_SEQ_ELTYPE_GRAPH_EDGE 0 /**< &next_o, &next_d, &vtx_o, &vtx_d */ +#define CV_SEQ_ELTYPE_GRAPH_VERTEX 0 /**< first_edge, &(x,y) */ +#define CV_SEQ_ELTYPE_TRIAN_ATR 0 /**< vertex of the binary tree */ +#define CV_SEQ_ELTYPE_CONNECTED_COMP 0 /**< connected component */ +#define CV_SEQ_ELTYPE_POINT3D CV_32FC3 /**< (x,y,z) */ + +#define CV_SEQ_KIND_BITS 2 +#define CV_SEQ_KIND_MASK (((1 << CV_SEQ_KIND_BITS) - 1)<flags & CV_SEQ_ELTYPE_MASK) +#define CV_SEQ_KIND( seq ) ((seq)->flags & CV_SEQ_KIND_MASK ) + +/** flag checking */ +#define CV_IS_SEQ_INDEX( seq ) ((CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_INDEX) && \ + (CV_SEQ_KIND(seq) == CV_SEQ_KIND_GENERIC)) + +#define CV_IS_SEQ_CURVE( seq ) (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE) +#define CV_IS_SEQ_CLOSED( seq ) (((seq)->flags & CV_SEQ_FLAG_CLOSED) != 0) +#define CV_IS_SEQ_CONVEX( seq ) 0 +#define CV_IS_SEQ_HOLE( seq ) (((seq)->flags & CV_SEQ_FLAG_HOLE) != 0) +#define CV_IS_SEQ_SIMPLE( seq ) 1 + +/** type checking macros */ +#define CV_IS_SEQ_POINT_SET( seq ) \ + ((CV_SEQ_ELTYPE(seq) == CV_32SC2 || CV_SEQ_ELTYPE(seq) == CV_32FC2)) + +#define CV_IS_SEQ_POINT_SUBSET( seq ) \ + (CV_IS_SEQ_INDEX( seq ) || CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_PPOINT) + +#define CV_IS_SEQ_POLYLINE( seq ) \ + (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && CV_IS_SEQ_POINT_SET(seq)) + +#define CV_IS_SEQ_POLYGON( seq ) \ + (CV_IS_SEQ_POLYLINE(seq) && CV_IS_SEQ_CLOSED(seq)) + +#define CV_IS_SEQ_CHAIN( seq ) \ + (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && (seq)->elem_size == 1) + +#define CV_IS_SEQ_CONTOUR( seq ) \ + (CV_IS_SEQ_CLOSED(seq) && (CV_IS_SEQ_POLYLINE(seq) || CV_IS_SEQ_CHAIN(seq))) + +#define CV_IS_SEQ_CHAIN_CONTOUR( seq ) \ + (CV_IS_SEQ_CHAIN( seq ) && CV_IS_SEQ_CLOSED( seq )) + +#define CV_IS_SEQ_POLYGON_TREE( seq ) \ + (CV_SEQ_ELTYPE (seq) == CV_SEQ_ELTYPE_TRIAN_ATR && \ + CV_SEQ_KIND( seq ) == CV_SEQ_KIND_BIN_TREE ) + +#define CV_IS_GRAPH( seq ) \ + (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_GRAPH) + +#define CV_IS_GRAPH_ORIENTED( seq ) \ + (((seq)->flags & CV_GRAPH_FLAG_ORIENTED) != 0) + +#define CV_IS_SUBDIV2D( seq ) \ + (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_SUBDIV2D) + +/****************************************************************************************/ +/* Sequence writer & reader */ +/****************************************************************************************/ + +#define CV_SEQ_WRITER_FIELDS() \ + int header_size; \ + CvSeq* seq; /**< the sequence written */ \ + CvSeqBlock* block; /**< current block */ \ + schar* ptr; /**< pointer to free space */ \ + schar* block_min; /**< pointer to the beginning of block*/\ + schar* block_max; /**< pointer to the end of block */ + +typedef struct CvSeqWriter +{ + CV_SEQ_WRITER_FIELDS() +} +CvSeqWriter; + + +#define CV_SEQ_READER_FIELDS() \ + int header_size; \ + CvSeq* seq; /**< sequence, beign read */ \ + CvSeqBlock* block; /**< current block */ \ + schar* ptr; /**< pointer to element be read next */ \ + schar* block_min; /**< pointer to the beginning of block */\ + schar* block_max; /**< pointer to the end of block */ \ + int delta_index;/**< = seq->first->start_index */ \ + schar* prev_elem; /**< pointer to previous element */ + +typedef struct CvSeqReader +{ + CV_SEQ_READER_FIELDS() +} +CvSeqReader; + +/****************************************************************************************/ +/* Operations on sequences */ +/****************************************************************************************/ + +#define CV_SEQ_ELEM( seq, elem_type, index ) \ +/** assert gives some guarantee that parameter is valid */ \ +( assert(sizeof((seq)->first[0]) == sizeof(CvSeqBlock) && \ + (seq)->elem_size == sizeof(elem_type)), \ + (elem_type*)((seq)->first && (unsigned)index < \ + (unsigned)((seq)->first->count) ? \ + (seq)->first->data + (index) * sizeof(elem_type) : \ + cvGetSeqElem( (CvSeq*)(seq), (index) ))) +#define CV_GET_SEQ_ELEM( elem_type, seq, index ) CV_SEQ_ELEM( (seq), elem_type, (index) ) + +/** Add element to sequence: */ +#define CV_WRITE_SEQ_ELEM_VAR( elem_ptr, writer ) \ +{ \ + if( (writer).ptr >= (writer).block_max ) \ + { \ + cvCreateSeqBlock( &writer); \ + } \ + memcpy((writer).ptr, elem_ptr, (writer).seq->elem_size);\ + (writer).ptr += (writer).seq->elem_size; \ +} + +#define CV_WRITE_SEQ_ELEM( elem, writer ) \ +{ \ + assert( (writer).seq->elem_size == sizeof(elem)); \ + if( (writer).ptr >= (writer).block_max ) \ + { \ + cvCreateSeqBlock( &writer); \ + } \ + assert( (writer).ptr <= (writer).block_max - sizeof(elem));\ + memcpy((writer).ptr, &(elem), sizeof(elem)); \ + (writer).ptr += sizeof(elem); \ +} + + +/** Move reader position forward: */ +#define CV_NEXT_SEQ_ELEM( elem_size, reader ) \ +{ \ + if( ((reader).ptr += (elem_size)) >= (reader).block_max ) \ + { \ + cvChangeSeqBlock( &(reader), 1 ); \ + } \ +} + + +/** Move reader position backward: */ +#define CV_PREV_SEQ_ELEM( elem_size, reader ) \ +{ \ + if( ((reader).ptr -= (elem_size)) < (reader).block_min ) \ + { \ + cvChangeSeqBlock( &(reader), -1 ); \ + } \ +} + +/** Read element and move read position forward: */ +#define CV_READ_SEQ_ELEM( elem, reader ) \ +{ \ + assert( (reader).seq->elem_size == sizeof(elem)); \ + memcpy( &(elem), (reader).ptr, sizeof((elem))); \ + CV_NEXT_SEQ_ELEM( sizeof(elem), reader ) \ +} + +/** Read element and move read position backward: */ +#define CV_REV_READ_SEQ_ELEM( elem, reader ) \ +{ \ + assert( (reader).seq->elem_size == sizeof(elem)); \ + memcpy(&(elem), (reader).ptr, sizeof((elem))); \ + CV_PREV_SEQ_ELEM( sizeof(elem), reader ) \ +} + + +#define CV_READ_CHAIN_POINT( _pt, reader ) \ +{ \ + (_pt) = (reader).pt; \ + if( (reader).ptr ) \ + { \ + CV_READ_SEQ_ELEM( (reader).code, (reader)); \ + assert( ((reader).code & ~7) == 0 ); \ + (reader).pt.x += (reader).deltas[(int)(reader).code][0]; \ + (reader).pt.y += (reader).deltas[(int)(reader).code][1]; \ + } \ +} + +#define CV_CURRENT_POINT( reader ) (*((CvPoint*)((reader).ptr))) +#define CV_PREV_POINT( reader ) (*((CvPoint*)((reader).prev_elem))) + +#define CV_READ_EDGE( pt1, pt2, reader ) \ +{ \ + assert( sizeof(pt1) == sizeof(CvPoint) && \ + sizeof(pt2) == sizeof(CvPoint) && \ + reader.seq->elem_size == sizeof(CvPoint)); \ + (pt1) = CV_PREV_POINT( reader ); \ + (pt2) = CV_CURRENT_POINT( reader ); \ + (reader).prev_elem = (reader).ptr; \ + CV_NEXT_SEQ_ELEM( sizeof(CvPoint), (reader)); \ +} + +/************ Graph macros ************/ + +/** Return next graph edge for given vertex: */ +#define CV_NEXT_GRAPH_EDGE( edge, vertex ) \ + (assert((edge)->vtx[0] == (vertex) || (edge)->vtx[1] == (vertex)), \ + (edge)->next[(edge)->vtx[1] == (vertex)]) + + + +/****************************************************************************************\ +* Data structures for persistence (a.k.a serialization) functionality * +\****************************************************************************************/ + +#if 0 + +/** "black box" file storage */ +typedef struct CvFileStorage CvFileStorage; + +/** Storage flags: */ +#define CV_STORAGE_READ 0 +#define CV_STORAGE_WRITE 1 +#define CV_STORAGE_WRITE_TEXT CV_STORAGE_WRITE +#define CV_STORAGE_WRITE_BINARY CV_STORAGE_WRITE +#define CV_STORAGE_APPEND 2 +#define CV_STORAGE_MEMORY 4 +#define CV_STORAGE_FORMAT_MASK (7<<3) +#define CV_STORAGE_FORMAT_AUTO 0 +#define CV_STORAGE_FORMAT_XML 8 +#define CV_STORAGE_FORMAT_YAML 16 +#define CV_STORAGE_FORMAT_JSON 24 +#define CV_STORAGE_BASE64 64 +#define CV_STORAGE_WRITE_BASE64 (CV_STORAGE_BASE64 | CV_STORAGE_WRITE) + +/** @brief List of attributes. : + +In the current implementation, attributes are used to pass extra parameters when writing user +objects (see cvWrite). XML attributes inside tags are not supported, aside from the object type +specification (type_id attribute). +@see cvAttrList, cvAttrValue + */ +typedef struct CvAttrList +{ + const char** attr; /**< NULL-terminated array of (attribute_name,attribute_value) pairs. */ + struct CvAttrList* next; /**< Pointer to next chunk of the attributes list. */ +} +CvAttrList; + +/** initializes CvAttrList structure */ +CV_INLINE CvAttrList cvAttrList( const char** attr CV_DEFAULT(NULL), + CvAttrList* next CV_DEFAULT(NULL) ) +{ + CvAttrList l; + l.attr = attr; + l.next = next; + + return l; +} + +struct CvTypeInfo; + +#define CV_NODE_NONE 0 +#define CV_NODE_INT 1 +#define CV_NODE_INTEGER CV_NODE_INT +#define CV_NODE_REAL 2 +#define CV_NODE_FLOAT CV_NODE_REAL +#define CV_NODE_STR 3 +#define CV_NODE_STRING CV_NODE_STR +#define CV_NODE_REF 4 /**< not used */ +#define CV_NODE_SEQ 5 +#define CV_NODE_MAP 6 +#define CV_NODE_TYPE_MASK 7 + +#define CV_NODE_TYPE(flags) ((flags) & CV_NODE_TYPE_MASK) + +/** file node flags */ +#define CV_NODE_FLOW 8 /**= CV_NODE_SEQ) +#define CV_NODE_IS_FLOW(flags) (((flags) & CV_NODE_FLOW) != 0) +#define CV_NODE_IS_EMPTY(flags) (((flags) & CV_NODE_EMPTY) != 0) +#define CV_NODE_IS_USER(flags) (((flags) & CV_NODE_USER) != 0) +#define CV_NODE_HAS_NAME(flags) (((flags) & CV_NODE_NAMED) != 0) + +#define CV_NODE_SEQ_SIMPLE 256 +#define CV_NODE_SEQ_IS_SIMPLE(seq) (((seq)->flags & CV_NODE_SEQ_SIMPLE) != 0) + +typedef struct CvString +{ + int len; + char* ptr; +} +CvString; + +/** All the keys (names) of elements in the read file storage + are stored in the hash to speed up the lookup operations: */ +typedef struct CvStringHashNode +{ + unsigned hashval; + CvString str; + struct CvStringHashNode* next; +} +CvStringHashNode; + +typedef struct CvGenericHash CvFileNodeHash; + +/** Basic element of the file storage - scalar or collection: */ +typedef struct CvFileNode +{ + int tag; + struct CvTypeInfo* info; /**< type information + (only for user-defined object, for others it is 0) */ + union + { + double f; /**< scalar floating-point number */ + int i; /**< scalar integer number */ + CvString str; /**< text string */ + CvSeq* seq; /**< sequence (ordered collection of file nodes) */ + CvFileNodeHash* map; /**< map (collection of named file nodes) */ + } data; +} +CvFileNode; + +#ifdef __cplusplus +extern "C" { +#endif +typedef int (CV_CDECL *CvIsInstanceFunc)( const void* struct_ptr ); +typedef void (CV_CDECL *CvReleaseFunc)( void** struct_dblptr ); +typedef void* (CV_CDECL *CvReadFunc)( CvFileStorage* storage, CvFileNode* node ); +typedef void (CV_CDECL *CvWriteFunc)( CvFileStorage* storage, const char* name, + const void* struct_ptr, CvAttrList attributes ); +typedef void* (CV_CDECL *CvCloneFunc)( const void* struct_ptr ); +#ifdef __cplusplus +} +#endif + +/** @brief Type information + +The structure contains information about one of the standard or user-defined types. Instances of the +type may or may not contain a pointer to the corresponding CvTypeInfo structure. In any case, there +is a way to find the type info structure for a given object using the cvTypeOf function. +Alternatively, type info can be found by type name using cvFindType, which is used when an object +is read from file storage. The user can register a new type with cvRegisterType that adds the type +information structure into the beginning of the type list. Thus, it is possible to create +specialized types from generic standard types and override the basic methods. + */ +typedef struct CvTypeInfo +{ + int flags; /**< not used */ + int header_size; /**< sizeof(CvTypeInfo) */ + struct CvTypeInfo* prev; /**< previous registered type in the list */ + struct CvTypeInfo* next; /**< next registered type in the list */ + const char* type_name; /**< type name, written to file storage */ + CvIsInstanceFunc is_instance; /**< checks if the passed object belongs to the type */ + CvReleaseFunc release; /**< releases object (memory etc.) */ + CvReadFunc read; /**< reads object from file storage */ + CvWriteFunc write; /**< writes object to file storage */ + CvCloneFunc clone; /**< creates a copy of the object */ +} +CvTypeInfo; +#endif + +/** @} */ + +#endif /*OPENCV_CORE_TYPES_H*/ + +/* End of file. */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utility.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utility.hpp new file mode 100644 index 0000000..c52abbb --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utility.hpp @@ -0,0 +1,1200 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_UTILITY_H +#define OPENCV_CORE_UTILITY_H + +#ifndef __cplusplus +# error utility.hpp header must be compiled as C++ +#endif + +#if defined(check) +# warning Detected Apple 'check' macro definition, it can cause build conflicts. Please, include this header before any Apple headers. +#endif + +#include "opencv2/core.hpp" +#include + +#include + +#if !defined(_M_CEE) +#include // std::mutex, std::lock_guard +#endif + +namespace cv +{ + +//! @addtogroup core_utils +//! @{ + +/** @brief Automatically Allocated Buffer Class + + The class is used for temporary buffers in functions and methods. + If a temporary buffer is usually small (a few K's of memory), + but its size depends on the parameters, it makes sense to create a small + fixed-size array on stack and use it if it's large enough. If the required buffer size + is larger than the fixed size, another buffer of sufficient size is allocated dynamically + and released after the processing. Therefore, in typical cases, when the buffer size is small, + there is no overhead associated with malloc()/free(). + At the same time, there is no limit on the size of processed data. + + This is what AutoBuffer does. The template takes 2 parameters - type of the buffer elements and + the number of stack-allocated elements. Here is how the class is used: + + \code + void my_func(const cv::Mat& m) + { + cv::AutoBuffer buf(1000); // create automatic buffer containing 1000 floats + + buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used, + // otherwise the buffer of "m.rows" floats will be allocated + // dynamically and deallocated in cv::AutoBuffer destructor + ... + } + \endcode +*/ +#ifdef OPENCV_ENABLE_MEMORY_SANITIZER +template class AutoBuffer +#else +template class AutoBuffer +#endif +{ +public: + typedef _Tp value_type; + + //! the default constructor + AutoBuffer(); + //! constructor taking the real buffer size + explicit AutoBuffer(size_t _size); + + //! the copy constructor + AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf); + //! the assignment operator + AutoBuffer<_Tp, fixed_size>& operator = (const AutoBuffer<_Tp, fixed_size>& buf); + + //! destructor. calls deallocate() + ~AutoBuffer(); + + //! allocates the new buffer of size _size. if the _size is small enough, stack-allocated buffer is used + void allocate(size_t _size); + //! deallocates the buffer if it was dynamically allocated + void deallocate(); + //! resizes the buffer and preserves the content + void resize(size_t _size); + //! returns the current buffer size + size_t size() const; + //! returns pointer to the real buffer, stack-allocated or heap-allocated + inline _Tp* data() { return ptr; } + //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated + inline const _Tp* data() const { return ptr; } + +#if !defined(OPENCV_DISABLE_DEPRECATED_COMPATIBILITY) // use to .data() calls instead + //! returns pointer to the real buffer, stack-allocated or heap-allocated + operator _Tp* () { return ptr; } + //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated + operator const _Tp* () const { return ptr; } +#else + //! returns a reference to the element at specified location. No bounds checking is performed in Release builds. + inline _Tp& operator[] (size_t i) { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; } + //! returns a reference to the element at specified location. No bounds checking is performed in Release builds. + inline const _Tp& operator[] (size_t i) const { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; } +#endif + +protected: + //! pointer to the real buffer, can point to buf if the buffer is small enough + _Tp* ptr; + //! size of the real buffer + size_t sz; + //! pre-allocated buffer. At least 1 element to confirm C++ standard requirements + _Tp buf[(fixed_size > 0) ? fixed_size : 1]; +}; + +/** @brief Sets/resets the break-on-error mode. + +When the break-on-error mode is set, the default error handler issues a hardware exception, which +can make debugging more convenient. + +\return the previous state + */ +CV_EXPORTS bool setBreakOnError(bool flag); + +extern "C" typedef int (*ErrorCallback)( int status, const char* func_name, + const char* err_msg, const char* file_name, + int line, void* userdata ); + + +/** @brief Sets the new error handler and the optional user data. + + The function sets the new error handler, called from cv::error(). + + \param errCallback the new error handler. If NULL, the default error handler is used. + \param userdata the optional user data pointer, passed to the callback. + \param prevUserdata the optional output parameter where the previous user data pointer is stored + + \return the previous error handler +*/ +CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0); + +CV_EXPORTS String tempfile( const char* suffix = 0); +CV_EXPORTS void glob(String pattern, std::vector& result, bool recursive = false); + +/** @brief OpenCV will try to set the number of threads for the next parallel region. + +If threads == 0, OpenCV will disable threading optimizations and run all it's functions +sequentially. Passing threads \< 0 will reset threads number to system default. This function must +be called outside of parallel region. + +OpenCV will try to run its functions with specified threads number, but some behaviour differs from +framework: +- `TBB` - User-defined parallel constructions will run with the same threads number, if + another is not specified. If later on user creates his own scheduler, OpenCV will use it. +- `OpenMP` - No special defined behaviour. +- `Concurrency` - If threads == 1, OpenCV will disable threading optimizations and run its + functions sequentially. +- `GCD` - Supports only values \<= 0. +- `C=` - No special defined behaviour. +@param nthreads Number of threads used by OpenCV. +@sa getNumThreads, getThreadNum + */ +CV_EXPORTS_W void setNumThreads(int nthreads); + +/** @brief Returns the number of threads used by OpenCV for parallel regions. + +Always returns 1 if OpenCV is built without threading support. + +The exact meaning of return value depends on the threading framework used by OpenCV library: +- `TBB` - The number of threads, that OpenCV will try to use for parallel regions. If there is + any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns + default number of threads used by TBB library. +- `OpenMP` - An upper bound on the number of threads that could be used to form a new team. +- `Concurrency` - The number of threads, that OpenCV will try to use for parallel regions. +- `GCD` - Unsupported; returns the GCD thread pool limit (512) for compatibility. +- `C=` - The number of threads, that OpenCV will try to use for parallel regions, if before + called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs, + available for the process. +@sa setNumThreads, getThreadNum + */ +CV_EXPORTS_W int getNumThreads(); + +/** @brief Returns the index of the currently executed thread within the current parallel region. Always +returns 0 if called outside of parallel region. + +@deprecated Current implementation doesn't corresponding to this documentation. + +The exact meaning of the return value depends on the threading framework used by OpenCV library: +- `TBB` - Unsupported with current 4.1 TBB release. Maybe will be supported in future. +- `OpenMP` - The thread number, within the current team, of the calling thread. +- `Concurrency` - An ID for the virtual processor that the current context is executing on (0 + for master thread and unique number for others, but not necessary 1,2,3,...). +- `GCD` - System calling thread's ID. Never returns 0 inside parallel region. +- `C=` - The index of the current parallel task. +@sa setNumThreads, getNumThreads + */ +CV_EXPORTS_W int getThreadNum(); + +/** @brief Returns full configuration time cmake output. + +Returned value is raw cmake output including version control system revision, compiler version, +compiler flags, enabled modules and third party libraries, etc. Output format depends on target +architecture. + */ +CV_EXPORTS_W const String& getBuildInformation(); + +/** @brief Returns library version string + +For example "3.4.1-dev". + +@sa getMajorVersion, getMinorVersion, getRevisionVersion +*/ +CV_EXPORTS_W String getVersionString(); + +/** @brief Returns major library version */ +CV_EXPORTS_W int getVersionMajor(); + +/** @brief Returns minor library version */ +CV_EXPORTS_W int getVersionMinor(); + +/** @brief Returns revision field of the library version */ +CV_EXPORTS_W int getVersionRevision(); + +/** @brief Returns the number of ticks. + +The function returns the number of ticks after the certain event (for example, when the machine was +turned on). It can be used to initialize RNG or to measure a function execution time by reading the +tick count before and after the function call. +@sa getTickFrequency, TickMeter + */ +CV_EXPORTS_W int64 getTickCount(); + +/** @brief Returns the number of ticks per second. + +The function returns the number of ticks per second. That is, the following code computes the +execution time in seconds: +@code + double t = (double)getTickCount(); + // do something ... + t = ((double)getTickCount() - t)/getTickFrequency(); +@endcode +@sa getTickCount, TickMeter + */ +CV_EXPORTS_W double getTickFrequency(); + +/** @brief a Class to measure passing time. + +The class computes passing time by counting the number of ticks per second. That is, the following code computes the +execution time in seconds: +@snippet snippets/core_various.cpp TickMeter_total + +It is also possible to compute the average time over multiple runs: +@snippet snippets/core_various.cpp TickMeter_average + +@sa getTickCount, getTickFrequency +*/ +class CV_EXPORTS_W TickMeter +{ +public: + //! the default constructor + CV_WRAP TickMeter() + { + reset(); + } + + //! starts counting ticks. + CV_WRAP void start() + { + startTime = cv::getTickCount(); + } + + //! stops counting ticks. + CV_WRAP void stop() + { + int64 time = cv::getTickCount(); + if (startTime == 0) + return; + ++counter; + sumTime += (time - startTime); + startTime = 0; + } + + //! returns counted ticks. + CV_WRAP int64 getTimeTicks() const + { + return sumTime; + } + + //! returns passed time in microseconds. + CV_WRAP double getTimeMicro() const + { + return getTimeMilli()*1e3; + } + + //! returns passed time in milliseconds. + CV_WRAP double getTimeMilli() const + { + return getTimeSec()*1e3; + } + + //! returns passed time in seconds. + CV_WRAP double getTimeSec() const + { + return (double)getTimeTicks() / getTickFrequency(); + } + + //! returns internal counter value. + CV_WRAP int64 getCounter() const + { + return counter; + } + + //! returns average FPS (frames per second) value. + CV_WRAP double getFPS() const + { + const double sec = getTimeSec(); + if (sec < DBL_EPSILON) + return 0.; + return counter / sec; + } + + //! returns average time in seconds + CV_WRAP double getAvgTimeSec() const + { + if (counter <= 0) + return 0.; + return getTimeSec() / counter; + } + + //! returns average time in milliseconds + CV_WRAP double getAvgTimeMilli() const + { + return getAvgTimeSec() * 1e3; + } + + //! resets internal values. + CV_WRAP void reset() + { + startTime = 0; + sumTime = 0; + counter = 0; + } + +private: + int64 counter; + int64 sumTime; + int64 startTime; +}; + +/** @brief output operator +@code +TickMeter tm; +tm.start(); +// do something ... +tm.stop(); +std::cout << tm; +@endcode +*/ + +static inline +std::ostream& operator << (std::ostream& out, const TickMeter& tm) +{ + return out << tm.getTimeSec() << "sec"; +} + +/** @brief Returns the number of CPU ticks. + +The function returns the current number of CPU ticks on some architectures (such as x86, x64, +PowerPC). On other platforms the function is equivalent to getTickCount. It can also be used for +very accurate time measurements, as well as for RNG initialization. Note that in case of multi-CPU +systems a thread, from which getCPUTickCount is called, can be suspended and resumed at another CPU +with its own counter. So, theoretically (and practically) the subsequent calls to the function do +not necessary return the monotonously increasing values. Also, since a modern CPU varies the CPU +frequency depending on the load, the number of CPU clocks spent in some code cannot be directly +converted to time units. Therefore, getTickCount is generally a preferable solution for measuring +execution time. + */ +CV_EXPORTS_W int64 getCPUTickCount(); + +/** @brief Returns true if the specified feature is supported by the host hardware. + +The function returns true if the host hardware supports the specified feature. When user calls +setUseOptimized(false), the subsequent calls to checkHardwareSupport() will return false until +setUseOptimized(true) is called. This way user can dynamically switch on and off the optimized code +in OpenCV. +@param feature The feature of interest, one of cv::CpuFeatures + */ +CV_EXPORTS_W bool checkHardwareSupport(int feature); + +/** @brief Returns feature name by ID + +Returns empty string if feature is not defined +*/ +CV_EXPORTS_W String getHardwareFeatureName(int feature); + +/** @brief Returns list of CPU features enabled during compilation. + +Returned value is a string containing space separated list of CPU features with following markers: + +- no markers - baseline features +- prefix `*` - features enabled in dispatcher +- suffix `?` - features enabled but not available in HW + +Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?` +*/ +CV_EXPORTS_W std::string getCPUFeaturesLine(); + +/** @brief Returns the number of logical CPUs available for the process. + */ +CV_EXPORTS_W int getNumberOfCPUs(); + + +/** @brief Aligns a pointer to the specified number of bytes. + +The function returns the aligned pointer of the same type as the input pointer: +\f[\texttt{(_Tp*)(((size_t)ptr + n-1) & -n)}\f] +@param ptr Aligned pointer. +@param n Alignment size that must be a power of two. + */ +template static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp)) +{ + CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2 + return (_Tp*)(((size_t)ptr + n-1) & -n); +} + +/** @brief Aligns a buffer size to the specified number of bytes. + +The function returns the minimum number that is greater than or equal to sz and is divisible by n : +\f[\texttt{(sz + n-1) & -n}\f] +@param sz Buffer size to align. +@param n Alignment size that must be a power of two. + */ +static inline size_t alignSize(size_t sz, int n) +{ + CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2 + return (sz + n-1) & -n; +} + +/** @brief Integer division with result round up. + +Use this function instead of `ceil((float)a / b)` expressions. + +@sa alignSize +*/ +static inline int divUp(int a, unsigned int b) +{ + CV_DbgAssert(a >= 0); + return (a + b - 1) / b; +} +/** @overload */ +static inline size_t divUp(size_t a, unsigned int b) +{ + return (a + b - 1) / b; +} + +/** @brief Round first value up to the nearest multiple of second value. + +Use this function instead of `ceil((float)a / b) * b` expressions. + +@sa divUp +*/ +static inline int roundUp(int a, unsigned int b) +{ + CV_DbgAssert(a >= 0); + return a + b - 1 - (a + b -1) % b; +} +/** @overload */ +static inline size_t roundUp(size_t a, unsigned int b) +{ + return a + b - 1 - (a + b - 1) % b; +} + +/** @brief Alignment check of passed values + +Usage: `isAligned(...)` + +@note Alignment(N) must be a power of 2 (2**k, 2^k) +*/ +template static inline +bool isAligned(const T& data) +{ + CV_StaticAssert((N & (N - 1)) == 0, ""); // power of 2 + return (((size_t)data) & (N - 1)) == 0; +} +/** @overload */ +template static inline +bool isAligned(const void* p1) +{ + return isAligned((size_t)p1); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2) +{ + return isAligned(((size_t)p1)|((size_t)p2)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4)); +} + +/** @brief Enables or disables the optimized code. + +The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2, +and other instructions on the platforms that support it). It sets a global flag that is further +checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only +safe to call the function on the very top level in your application where you can be sure that no +other OpenCV function is currently executed. + +By default, the optimized code is enabled unless you disable it in CMake. The current status can be +retrieved using useOptimized. +@param onoff The boolean flag specifying whether the optimized code should be used (onoff=true) +or not (onoff=false). + */ +CV_EXPORTS_W void setUseOptimized(bool onoff); + +/** @brief Returns the status of optimized code usage. + +The function returns true if the optimized code is enabled. Otherwise, it returns false. + */ +CV_EXPORTS_W bool useOptimized(); + +static inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); } + +/////////////////////////////// Parallel Primitives ////////////////////////////////// + +/** @brief Base class for parallel data processors +*/ +class CV_EXPORTS ParallelLoopBody +{ +public: + virtual ~ParallelLoopBody(); + virtual void operator() (const Range& range) const = 0; +}; + +/** @brief Parallel data processor +*/ +CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.); + +class ParallelLoopBodyLambdaWrapper : public ParallelLoopBody +{ +private: + std::function m_functor; +public: + ParallelLoopBodyLambdaWrapper(std::function functor) : + m_functor(functor) + { } + + virtual void operator() (const cv::Range& range) const CV_OVERRIDE + { + m_functor(range); + } +}; + +inline void parallel_for_(const Range& range, std::function functor, double nstripes=-1.) +{ + parallel_for_(range, ParallelLoopBodyLambdaWrapper(functor), nstripes); +} + +/////////////////////////////// forEach method of cv::Mat //////////////////////////// +template inline +void Mat::forEach_impl(const Functor& operation) { + if (false) { + operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast(0)); + // If your compiler fails in this line. + // Please check that your functor signature is + // (_Tp&, const int*) <- multi-dimensional + // or (_Tp&, void*) <- in case you don't need current idx. + } + + CV_Assert(!empty()); + CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX); + const int LINES = static_cast(this->total() / this->size[this->dims - 1]); + + class PixelOperationWrapper :public ParallelLoopBody + { + public: + PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation) + : mat(frame), op(_operation) {} + virtual ~PixelOperationWrapper(){} + // ! Overloaded virtual operator + // convert range call to row call. + virtual void operator()(const Range &range) const CV_OVERRIDE + { + const int DIMS = mat->dims; + const int COLS = mat->size[DIMS - 1]; + if (DIMS <= 2) { + for (int row = range.start; row < range.end; ++row) { + this->rowCall2(row, COLS); + } + } else { + std::vector idx(DIMS); /// idx is modified in this->rowCall + idx[DIMS - 2] = range.start - 1; + + for (int line_num = range.start; line_num < range.end; ++line_num) { + idx[DIMS - 2]++; + for (int i = DIMS - 2; i >= 0; --i) { + if (idx[i] >= mat->size[i]) { + idx[i - 1] += idx[i] / mat->size[i]; + idx[i] %= mat->size[i]; + continue; // carry-over; + } + else { + break; + } + } + this->rowCall(&idx[0], COLS, DIMS); + } + } + } + private: + Mat_<_Tp>* const mat; + const Functor op; + // ! Call operator for each elements in this row. + inline void rowCall(int* const idx, const int COLS, const int DIMS) const { + int &col = idx[DIMS - 1]; + col = 0; + _Tp* pixel = &(mat->template at<_Tp>(idx)); + + while (col < COLS) { + op(*pixel, const_cast(idx)); + pixel++; col++; + } + col = 0; + } + // ! Call operator for each elements in this row. 2d mat special version. + inline void rowCall2(const int row, const int COLS) const { + union Index{ + int body[2]; + operator const int*() const { + return reinterpret_cast(this); + } + int& operator[](const int i) { + return body[i]; + } + } idx = {{row, 0}}; + // Special union is needed to avoid + // "error: array subscript is above array bounds [-Werror=array-bounds]" + // when call the functor `op` such that access idx[3]. + + _Tp* pixel = &(mat->template at<_Tp>(idx)); + const _Tp* const pixel_end = pixel + COLS; + while(pixel < pixel_end) { + op(*pixel++, static_cast(idx)); + idx[1]++; + } + } + PixelOperationWrapper& operator=(const PixelOperationWrapper &) { + CV_Assert(false); + // We can not remove this implementation because Visual Studio warning C4822. + return *this; + } + }; + + parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast*>(this), operation)); +} + +/////////////////////////// Synchronization Primitives /////////////////////////////// + +#if !defined(_M_CEE) +typedef std::recursive_mutex Mutex; +typedef std::lock_guard AutoLock; +#endif + + +/** @brief Designed for command line parsing + +The sample below demonstrates how to use CommandLineParser: +@code + CommandLineParser parser(argc, argv, keys); + parser.about("Application name v1.0.0"); + + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + int N = parser.get("N"); + double fps = parser.get("fps"); + String path = parser.get("path"); + + use_time_stamp = parser.has("timestamp"); + + String img1 = parser.get(0); + String img2 = parser.get(1); + + int repeat = parser.get(2); + + if (!parser.check()) + { + parser.printErrors(); + return 0; + } +@endcode + +### Keys syntax + +The keys parameter is a string containing several blocks, each one is enclosed in curly braces and +describes one argument. Each argument contains three parts separated by the `|` symbol: + +-# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol) +-# default value will be used if the argument was not provided (can be empty) +-# help message (can be empty) + +For example: + +@code{.cpp} + const String keys = + "{help h usage ? | | print this message }" + "{@image1 | | image1 for compare }" + "{@image2 || image2 for compare }" + "{@repeat |1 | number }" + "{path |. | path to file }" + "{fps | -1.0 | fps for output video }" + "{N count |100 | count of objects }" + "{ts timestamp | | use time stamp }" + ; +} +@endcode + +Note that there are no default values for `help` and `timestamp` so we can check their presence using the `has()` method. +Arguments with default values are considered to be always present. Use the `get()` method in these cases to check their +actual value instead. + +String keys like `get("@image1")` return the empty string `""` by default - even with an empty default value. +Use the special `` default value to enforce that the returned string must not be empty. (like in `get("@image2")`) + +### Usage + +For the described keys: + +@code{.sh} + # Good call (3 positional parameters: image1, image2 and repeat; N is 200, ts is true) + $ ./app -N=200 1.png 2.jpg 19 -ts + + # Bad call + $ ./app -fps=aaa + ERRORS: + Parameter 'fps': can not convert: [aaa] to [double] +@endcode + */ +class CV_EXPORTS CommandLineParser +{ +public: + + /** @brief Constructor + + Initializes command line parser object + + @param argc number of command line arguments (from main()) + @param argv array of command line arguments (from main()) + @param keys string describing acceptable command line parameters (see class description for syntax) + */ + CommandLineParser(int argc, const char* const argv[], const String& keys); + + /** @brief Copy constructor */ + CommandLineParser(const CommandLineParser& parser); + + /** @brief Assignment operator */ + CommandLineParser& operator = (const CommandLineParser& parser); + + /** @brief Destructor */ + ~CommandLineParser(); + + /** @brief Returns application path + + This method returns the path to the executable from the command line (`argv[0]`). + + For example, if the application has been started with such a command: + @code{.sh} + $ ./bin/my-executable + @endcode + this method will return `./bin`. + */ + String getPathToApplication() const; + + /** @brief Access arguments by name + + Returns argument converted to selected type. If the argument is not known or can not be + converted to selected type, the error flag is set (can be checked with @ref check). + + For example, define: + @code{.cpp} + String keys = "{N count||}"; + @endcode + + Call: + @code{.sh} + $ ./my-app -N=20 + # or + $ ./my-app --count=20 + @endcode + + Access: + @code{.cpp} + int N = parser.get("N"); + @endcode + + @param name name of the argument + @param space_delete remove spaces from the left and right of the string + @tparam T the argument will be converted to this type if possible + + @note You can access positional arguments by their `@`-prefixed name: + @code{.cpp} + parser.get("@image"); + @endcode + */ + template + T get(const String& name, bool space_delete = true) const + { + T val = T(); + getByName(name, space_delete, ParamType::type, (void*)&val); + return val; + } + + /** @brief Access positional arguments by index + + Returns argument converted to selected type. Indexes are counted from zero. + + For example, define: + @code{.cpp} + String keys = "{@arg1||}{@arg2||}" + @endcode + + Call: + @code{.sh} + ./my-app abc qwe + @endcode + + Access arguments: + @code{.cpp} + String val_1 = parser.get(0); // returns "abc", arg1 + String val_2 = parser.get(1); // returns "qwe", arg2 + @endcode + + @param index index of the argument + @param space_delete remove spaces from the left and right of the string + @tparam T the argument will be converted to this type if possible + */ + template + T get(int index, bool space_delete = true) const + { + T val = T(); + getByIndex(index, space_delete, ParamType::type, (void*)&val); + return val; + } + + /** @brief Check if field was provided in the command line + + @param name argument name to check + */ + bool has(const String& name) const; + + /** @brief Check for parsing errors + + Returns false if error occurred while accessing the parameters (bad conversion, missing arguments, + etc.). Call @ref printErrors to print error messages list. + */ + bool check() const; + + /** @brief Set the about message + + The about message will be shown when @ref printMessage is called, right before arguments table. + */ + void about(const String& message); + + /** @brief Print help message + + This method will print standard help message containing the about message and arguments description. + + @sa about + */ + void printMessage() const; + + /** @brief Print list of errors occurred + + @sa check + */ + void printErrors() const; + +protected: + void getByName(const String& name, bool space_delete, Param type, void* dst) const; + void getByIndex(int index, bool space_delete, Param type, void* dst) const; + + struct Impl; + Impl* impl; +}; + +//! @} core_utils + +//! @cond IGNORED + +/////////////////////////////// AutoBuffer implementation //////////////////////////////////////// + +template inline +AutoBuffer<_Tp, fixed_size>::AutoBuffer() +{ + ptr = buf; + sz = fixed_size; +} + +template inline +AutoBuffer<_Tp, fixed_size>::AutoBuffer(size_t _size) +{ + ptr = buf; + sz = fixed_size; + allocate(_size); +} + +template inline +AutoBuffer<_Tp, fixed_size>::AutoBuffer(const AutoBuffer<_Tp, fixed_size>& abuf ) +{ + ptr = buf; + sz = fixed_size; + allocate(abuf.size()); + for( size_t i = 0; i < sz; i++ ) + ptr[i] = abuf.ptr[i]; +} + +template inline AutoBuffer<_Tp, fixed_size>& +AutoBuffer<_Tp, fixed_size>::operator = (const AutoBuffer<_Tp, fixed_size>& abuf) +{ + if( this != &abuf ) + { + deallocate(); + allocate(abuf.size()); + for( size_t i = 0; i < sz; i++ ) + ptr[i] = abuf.ptr[i]; + } + return *this; +} + +template inline +AutoBuffer<_Tp, fixed_size>::~AutoBuffer() +{ deallocate(); } + +template inline void +AutoBuffer<_Tp, fixed_size>::allocate(size_t _size) +{ + if(_size <= sz) + { + sz = _size; + return; + } + deallocate(); + sz = _size; + if(_size > fixed_size) + { + ptr = new _Tp[_size]; + } +} + +template inline void +AutoBuffer<_Tp, fixed_size>::deallocate() +{ + if( ptr != buf ) + { + delete[] ptr; + ptr = buf; + sz = fixed_size; + } +} + +template inline void +AutoBuffer<_Tp, fixed_size>::resize(size_t _size) +{ + if(_size <= sz) + { + sz = _size; + return; + } + size_t i, prevsize = sz, minsize = MIN(prevsize, _size); + _Tp* prevptr = ptr; + + ptr = _size > fixed_size ? new _Tp[_size] : buf; + sz = _size; + + if( ptr != prevptr ) + for( i = 0; i < minsize; i++ ) + ptr[i] = prevptr[i]; + for( i = prevsize; i < _size; i++ ) + ptr[i] = _Tp(); + + if( prevptr != buf ) + delete[] prevptr; +} + +template inline size_t +AutoBuffer<_Tp, fixed_size>::size() const +{ return sz; } + +//! @endcond + + +// Basic Node class for tree building +template +class CV_EXPORTS Node +{ +public: + Node() + { + m_pParent = 0; + } + Node(OBJECT& payload) : m_payload(payload) + { + m_pParent = 0; + } + ~Node() + { + removeChilds(); + if (m_pParent) + { + int idx = m_pParent->findChild(this); + if (idx >= 0) + m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx); + } + } + + Node* findChild(OBJECT& payload) const + { + for(size_t i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i]->m_payload == payload) + return this->m_childs[i]; + } + return NULL; + } + + int findChild(Node *pNode) const + { + for (size_t i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i] == pNode) + return (int)i; + } + return -1; + } + + void addChild(Node *pNode) + { + if(!pNode) + return; + + CV_Assert(pNode->m_pParent == 0); + pNode->m_pParent = this; + this->m_childs.push_back(pNode); + } + + void removeChilds() + { + for(size_t i = 0; i < m_childs.size(); i++) + { + m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming + delete m_childs[i]; + } + m_childs.clear(); + } + + int getDepth() + { + int count = 0; + Node *pParent = m_pParent; + while(pParent) count++, pParent = pParent->m_pParent; + return count; + } + +public: + OBJECT m_payload; + Node* m_pParent; + std::vector*> m_childs; +}; + + +namespace samples { + +//! @addtogroup core_utils_samples +// This section describes utility functions for OpenCV samples. +// +// @note Implementation of these utilities is not thread-safe. +// +//! @{ + +/** @brief Try to find requested data file + +Search directories: + +1. Directories passed via `addSamplesDataSearchPath()` +2. OPENCV_SAMPLES_DATA_PATH_HINT environment variable +3. OPENCV_SAMPLES_DATA_PATH environment variable + If parameter value is not empty and nothing is found then stop searching. +4. Detects build/install path based on: + a. current working directory (CWD) + b. and/or binary module location (opencv_core/opencv_world, doesn't work with static linkage) +5. Scan `/{,data,samples/data}` directories if build directory is detected or the current directory is in source tree. +6. Scan `/share/OpenCV` directory if install directory is detected. + +@see cv::utils::findDataFile + +@param relative_path Relative path to data file +@param required Specify "file not found" handling. + If true, function prints information message and raises cv::Exception. + If false, function returns empty result +@param silentMode Disables messages +@return Returns path (absolute or relative to the current directory) or empty string if file is not found +*/ +CV_EXPORTS_W cv::String findFile(const cv::String& relative_path, bool required = true, bool silentMode = false); + +CV_EXPORTS_W cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode = false); + +inline cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode) +{ + cv::String res = findFile(relative_path, false, silentMode); + if (res.empty()) + return relative_path; + return res; +} + +/** @brief Override search data path by adding new search location + +Use this only to override default behavior +Passed paths are used in LIFO order. + +@param path Path to used samples data +*/ +CV_EXPORTS_W void addSamplesDataSearchPath(const cv::String& path); + +/** @brief Append samples search data sub directory + +General usage is to add OpenCV modules name (`/modules//samples/data` -> `/samples/data` + `modules//samples/data`). +Passed subdirectories are used in LIFO order. + +@param subdir samples data sub directory +*/ +CV_EXPORTS_W void addSamplesDataSearchSubDirectory(const cv::String& subdir); + +//! @} +} // namespace samples + +namespace utils { + +CV_EXPORTS int getThreadID(); + +} // namespace + +} //namespace cv + +#ifdef CV_COLLECT_IMPL_DATA +#include "opencv2/core/utils/instrumentation.hpp" +#else +/// Collect implementation data on OpenCV function call. Requires ENABLE_IMPL_COLLECTION build option. +#define CV_IMPL_ADD(impl) +#endif + +#endif //OPENCV_CORE_UTILITY_H diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/allocator_stats.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/allocator_stats.hpp new file mode 100644 index 0000000..79e9338 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/allocator_stats.hpp @@ -0,0 +1,29 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ALLOCATOR_STATS_HPP +#define OPENCV_CORE_ALLOCATOR_STATS_HPP + +#include "../cvdef.h" + +namespace cv { namespace utils { + +class AllocatorStatisticsInterface +{ +protected: + AllocatorStatisticsInterface() {} + virtual ~AllocatorStatisticsInterface() {} +public: + virtual uint64_t getCurrentUsage() const = 0; + virtual uint64_t getTotalUsage() const = 0; + virtual uint64_t getNumberOfAllocations() const = 0; + virtual uint64_t getPeakUsage() const = 0; + + /** set peak usage = current usage */ + virtual void resetPeakUsage() = 0; +}; + +}} // namespace + +#endif // OPENCV_CORE_ALLOCATOR_STATS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/allocator_stats.impl.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/allocator_stats.impl.hpp new file mode 100644 index 0000000..eb5ecde --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/allocator_stats.impl.hpp @@ -0,0 +1,158 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP +#define OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP + +#include "./allocator_stats.hpp" + +//#define OPENCV_DISABLE_ALLOCATOR_STATS + +#ifdef CV_CXX11 + +#include + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#if defined(__GNUC__) && (\ + (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4) || \ + (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)) \ + ) +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int +#endif +#endif + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long +#endif + +#else // CV_CXX11 + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int // CV_XADD supports int only +#endif + +#endif // CV_CXX11 + +namespace cv { namespace utils { + +#ifdef CV__ALLOCATOR_STATS_LOG +namespace { +#endif + +class AllocatorStatistics : public AllocatorStatisticsInterface +{ +#ifdef OPENCV_DISABLE_ALLOCATOR_STATS + +public: + AllocatorStatistics() {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return 0; } + uint64_t getTotalUsage() const CV_OVERRIDE { return 0; } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return 0; } + uint64_t getPeakUsage() const CV_OVERRIDE { return 0; } + + /** set peak usage = current usage */ + void resetPeakUsage() CV_OVERRIDE {}; + + void onAllocate(size_t /*sz*/) {} + void onFree(size_t /*sz*/) {} + +#elif defined(CV_CXX11) + +protected: + typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t; + std::atomic curr, total, total_allocs, peak; +public: + AllocatorStatistics() {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr.load(); } + uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total.load(); } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs.load(); } + uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak.load(); } + + /** set peak usage = current usage */ + void resetPeakUsage() CV_OVERRIDE { peak.store(curr.load()); } + + // Controller interface + void onAllocate(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load())); +#endif + + counter_t new_curr = curr.fetch_add((counter_t)sz) + (counter_t)sz; + + // peak = std::max((uint64_t)peak, new_curr); + auto prev_peak = peak.load(); + while (prev_peak < new_curr) + { + if (peak.compare_exchange_weak(prev_peak, new_curr)) + break; + } + // end of peak = max(...) + + total += (counter_t)sz; + total_allocs++; + } + void onFree(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load())); +#endif + curr -= (counter_t)sz; + } + +#else // non C++11 + +protected: + typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t; + volatile counter_t curr, total, total_allocs, peak; // overflow is possible, CV_XADD operates with 'int' only +public: + AllocatorStatistics() + : curr(0), total(0), total_allocs(0), peak(0) + {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr; } + uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total; } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs; } + uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak; } + + void resetPeakUsage() CV_OVERRIDE { peak = curr; } + + // Controller interface + void onAllocate(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr)); +#endif + + counter_t new_curr = (counter_t)CV_XADD(&curr, (counter_t)sz) + (counter_t)sz; + + peak = std::max((counter_t)peak, new_curr); // non-thread safe + + //CV_XADD(&total, (uint64_t)sz); // overflow with int, non-reliable... + total += sz; + + CV_XADD(&total_allocs, (counter_t)1); + } + void onFree(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr)); +#endif + CV_XADD(&curr, (counter_t)-sz); + } +#endif +}; + +#ifdef CV__ALLOCATOR_STATS_LOG +} // namespace +#endif + +}} // namespace + +#endif // OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/filesystem.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/filesystem.hpp new file mode 100644 index 0000000..a98d220 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/filesystem.hpp @@ -0,0 +1,82 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_FILESYSTEM_HPP +#define OPENCV_UTILS_FILESYSTEM_HPP + +namespace cv { namespace utils { namespace fs { + + +CV_EXPORTS bool exists(const cv::String& path); +CV_EXPORTS bool isDirectory(const cv::String& path); + +CV_EXPORTS void remove_all(const cv::String& path); + + +CV_EXPORTS cv::String getcwd(); + +/** @brief Converts path p to a canonical absolute path + * Symlinks are processed if there is support for them on running platform. + * + * @param path input path. Target file/directory should exist. + */ +CV_EXPORTS cv::String canonical(const cv::String& path); + +/** Join path components */ +CV_EXPORTS cv::String join(const cv::String& base, const cv::String& path); + +/** Get parent directory */ +CV_EXPORTS cv::String getParent(const cv::String &path); +CV_EXPORTS std::wstring getParent(const std::wstring& path); + +/** + * Generate a list of all files that match the globbing pattern. + * + * Result entries are prefixed by base directory path. + * + * @param directory base directory + * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results + * @param[out] result result of globing. + * @param recursive scan nested directories too + * @param includeDirectories include directories into results list + */ +CV_EXPORTS void glob(const cv::String& directory, const cv::String& pattern, + CV_OUT std::vector& result, + bool recursive = false, bool includeDirectories = false); + +/** + * Generate a list of all files that match the globbing pattern. + * + * @param directory base directory + * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results + * @param[out] result globbing result with relative paths from base directory + * @param recursive scan nested directories too + * @param includeDirectories include directories into results list + */ +CV_EXPORTS void glob_relative(const cv::String& directory, const cv::String& pattern, + CV_OUT std::vector& result, + bool recursive = false, bool includeDirectories = false); + + +CV_EXPORTS bool createDirectory(const cv::String& path); +CV_EXPORTS bool createDirectories(const cv::String& path); + +#ifdef __OPENCV_BUILD +// TODO +//CV_EXPORTS cv::String getTempDirectory(); + +/** + * @brief Returns directory to store OpenCV cache files + * Create sub-directory in common OpenCV cache directory if it doesn't exist. + * @param sub_directory_name name of sub-directory. NULL or "" value asks to return root cache directory. + * @param configuration_name optional name of configuration parameter name which overrides default behavior. + * @return Path to cache directory. Returns empty string if cache directories support is not available. Returns "disabled" if cache disabled by user. + */ +CV_EXPORTS cv::String getCacheDirectory(const char* sub_directory_name, const char* configuration_name = NULL); + +#endif + +}}} // namespace + +#endif // OPENCV_UTILS_FILESYSTEM_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/instrumentation.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/instrumentation.hpp new file mode 100644 index 0000000..3639867 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/instrumentation.hpp @@ -0,0 +1,125 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_INSTR_HPP +#define OPENCV_UTILS_INSTR_HPP + +#include +#include + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +#ifdef CV_COLLECT_IMPL_DATA +CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays +CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays +// Get stored implementation flags and functions names arrays +// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function +CV_EXPORTS int getImpl(std::vector &impl, std::vector &funName); + +CV_EXPORTS bool useCollection(); // return implementation collection state +CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state + +#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation +#define CV_IMPL_OCL 0x02 // OpenCL implementation +#define CV_IMPL_IPP 0x04 // IPP implementation +#define CV_IMPL_MT 0x10 // multithreaded implementation + +#undef CV_IMPL_ADD +#define CV_IMPL_ADD(impl) \ + if(cv::useCollection()) \ + { \ + cv::addImpl(impl, CV_Func); \ + } +#endif + +// Instrumentation external interface +namespace instr +{ + +#if !defined OPENCV_ABI_CHECK + +enum TYPE +{ + TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function + TYPE_MARKER, // Information marker + TYPE_WRAPPER, // Wrapper function for implementation + TYPE_FUN, // Simple function call +}; + +enum IMPL +{ + IMPL_PLAIN = 0, + IMPL_IPP, + IMPL_OPENCL, +}; + +struct NodeDataTls +{ + NodeDataTls() + { + m_ticksTotal = 0; + } + uint64 m_ticksTotal; +}; + +class CV_EXPORTS NodeData +{ +public: + NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); + NodeData(NodeData &ref); + ~NodeData(); + NodeData& operator=(const NodeData&); + + cv::String m_funName; + cv::instr::TYPE m_instrType; + cv::instr::IMPL m_implType; + const char* m_fileName; + int m_lineNum; + void* m_retAddress; + bool m_alwaysExpand; + bool m_funError; + + volatile int m_counter; + volatile uint64 m_ticksTotal; + TLSDataAccumulator m_tls; + int m_threads; + + // No synchronization + double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; } + double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; } +}; +bool operator==(const NodeData& lhs, const NodeData& rhs); + +typedef Node InstrNode; + +CV_EXPORTS InstrNode* getTrace(); + +#endif // !defined OPENCV_ABI_CHECK + + +CV_EXPORTS bool useInstrumentation(); +CV_EXPORTS void setUseInstrumentation(bool flag); +CV_EXPORTS void resetTrace(); + +enum FLAGS +{ + FLAGS_NONE = 0, + FLAGS_MAPPING = 0x01, + FLAGS_EXPAND_SAME_NAMES = 0x02, +}; + +CV_EXPORTS void setFlags(FLAGS modeFlags); +static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); } +CV_EXPORTS FLAGS getFlags(); + +} // namespace instr + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logger.defines.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logger.defines.hpp new file mode 100644 index 0000000..7d73f02 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logger.defines.hpp @@ -0,0 +1,42 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_LOGGER_DEFINES_HPP +#define OPENCV_LOGGER_DEFINES_HPP + +//! @addtogroup core_logging +//! @{ + +// Supported logging levels and their semantic +#define CV_LOG_LEVEL_SILENT 0 //!< for using in setLogLevel() call +#define CV_LOG_LEVEL_FATAL 1 //!< Fatal (critical) error (unrecoverable internal error) +#define CV_LOG_LEVEL_ERROR 2 //!< Error message +#define CV_LOG_LEVEL_WARN 3 //!< Warning message +#define CV_LOG_LEVEL_INFO 4 //!< Info message +#define CV_LOG_LEVEL_DEBUG 5 //!< Debug message. Disabled in the "Release" build. +#define CV_LOG_LEVEL_VERBOSE 6 //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. + +namespace cv { +namespace utils { +namespace logging { + +//! Supported logging levels and their semantic +enum LogLevel { + LOG_LEVEL_SILENT = 0, //!< for using in setLogVevel() call + LOG_LEVEL_FATAL = 1, //!< Fatal (critical) error (unrecoverable internal error) + LOG_LEVEL_ERROR = 2, //!< Error message + LOG_LEVEL_WARNING = 3, //!< Warning message + LOG_LEVEL_INFO = 4, //!< Info message + LOG_LEVEL_DEBUG = 5, //!< Debug message. Disabled in the "Release" build. + LOG_LEVEL_VERBOSE = 6, //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. +#ifndef CV_DOXYGEN + ENUM_LOG_LEVEL_FORCE_INT = INT_MAX +#endif +}; + +}}} // namespace + +//! @} + +#endif // OPENCV_LOGGER_DEFINES_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logger.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logger.hpp new file mode 100644 index 0000000..accb860 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logger.hpp @@ -0,0 +1,218 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_LOGGER_HPP +#define OPENCV_LOGGER_HPP + +#include +#include +#include // INT_MAX + +#include "logger.defines.hpp" +#include "logtag.hpp" + +namespace cv { +namespace utils { +namespace logging { + +//! @addtogroup core_logging +//! @{ + +/** Set global logging level +@return previous logging level +*/ +CV_EXPORTS LogLevel setLogLevel(LogLevel logLevel); +/** Get global logging level */ +CV_EXPORTS LogLevel getLogLevel(); + +CV_EXPORTS void registerLogTag(cv::utils::logging::LogTag* plogtag); + +CV_EXPORTS void setLogTagLevel(const char* tag, cv::utils::logging::LogLevel level); + +CV_EXPORTS cv::utils::logging::LogLevel getLogTagLevel(const char* tag); + +namespace internal { + +/** Get global log tag */ +CV_EXPORTS cv::utils::logging::LogTag* getGlobalLogTag(); + +/** Write log message */ +CV_EXPORTS void writeLogMessage(LogLevel logLevel, const char* message); + +/** Write log message */ +CV_EXPORTS void writeLogMessageEx(LogLevel logLevel, const char* tag, const char* file, int line, const char* func, const char* message); + +} // namespace + +struct LogTagAuto + : public LogTag +{ + inline LogTagAuto(const char* _name, LogLevel _level) + : LogTag(_name, _level) + { + registerLogTag(this); + } +}; + +/** + * \def CV_LOG_STRIP_LEVEL + * + * Define CV_LOG_STRIP_LEVEL=CV_LOG_LEVEL_[DEBUG|INFO|WARN|ERROR|FATAL|SILENT] to compile out anything at that and before that logging level + */ +#ifndef CV_LOG_STRIP_LEVEL +# if defined NDEBUG +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG +# else +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE +# endif +#endif + +#define CV_LOGTAG_PTR_CAST(expr) static_cast(expr) + +// CV_LOGTAG_EXPAND_NAME is intended to be re-defined (undef and then define again) +// to allows logging users to use a shorter name argument when calling +// CV_LOG_WITH_TAG or its related macros such as CV_LOG_INFO. +// +// This macro is intended to modify the tag argument as a string (token), via +// preprocessor token pasting or metaprogramming techniques. A typical usage +// is to apply a prefix, such as +// ...... #define CV_LOGTAG_EXPAND_NAME(tag) cv_logtag_##tag +// +// It is permitted to re-define to a hard-coded expression, ignoring the tag. +// This would work identically like the CV_LOGTAG_FALLBACK macro. +// +// Important: When the logging macro is called with tag being NULL, a user-defined +// CV_LOGTAG_EXPAND_NAME may expand it into cv_logtag_0, cv_logtag_NULL, or +// cv_logtag_nullptr. Use with care. Also be mindful of C++ symbol redefinitions. +// +// If there is significant amount of logging code with tag being NULL, it is +// recommended to use (re-define) CV_LOGTAG_FALLBACK to inject locally a default +// tag at the beginning of a compilation unit, to minimize lines of code changes. +// +#define CV_LOGTAG_EXPAND_NAME(tag) tag + +// CV_LOGTAG_FALLBACK is intended to be re-defined (undef and then define again) +// by any other compilation units to provide a log tag when the logging statement +// does not specify one. The macro needs to expand into a C++ expression that can +// be static_cast into (cv::utils::logging::LogTag*). Null (nullptr) is permitted. +#define CV_LOGTAG_FALLBACK nullptr + +// CV_LOGTAG_GLOBAL is the tag used when a log tag is not specified in the logging +// statement nor the compilation unit. The macro needs to expand into a C++ +// expression that can be static_cast into (cv::utils::logging::LogTag*). Must be +// non-null. Do not re-define. +#define CV_LOGTAG_GLOBAL cv::utils::logging::internal::getGlobalLogTag() + +#define CV_LOG_WITH_TAG(tag, msgLevel, extra_check0, extra_check1, ...) \ + for(;;) { \ + extra_check0; \ + const auto cv_temp_msglevel = (cv::utils::logging::LogLevel)(msgLevel); \ + if (cv_temp_msglevel >= (CV_LOG_STRIP_LEVEL)) break; \ + auto cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_EXPAND_NAME(tag)); \ + if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_FALLBACK); \ + if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_GLOBAL); \ + if (cv_temp_logtagptr && (cv_temp_msglevel > cv_temp_logtagptr->level)) break; \ + extra_check1; \ + std::stringstream cv_temp_logstream; \ + cv_temp_logstream << __VA_ARGS__; \ + cv::utils::logging::internal::writeLogMessageEx( \ + cv_temp_msglevel, \ + (cv_temp_logtagptr ? cv_temp_logtagptr->name : nullptr), \ + __FILE__, \ + __LINE__, \ + CV_Func, \ + cv_temp_logstream.str().c_str()); \ + break; \ + } + +#define CV_LOG_FATAL(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , , __VA_ARGS__) +#define CV_LOG_ERROR(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , , __VA_ARGS__) +#define CV_LOG_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , , __VA_ARGS__) +#define CV_LOG_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , , __VA_ARGS__) +#define CV_LOG_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , , __VA_ARGS__) +#define CV_LOG_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , , __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_INFO +#define CV_LOG_INFO(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_DEBUG +#define CV_LOG_DEBUG(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_VERBOSE +#define CV_LOG_VERBOSE(tag, v, ...) +#endif + +//! @cond IGNORED +#define CV__LOG_ONCE_CHECK_PRE \ + static bool _cv_log_once_ ## __LINE__ = false; \ + if (_cv_log_once_ ## __LINE__) break; + +#define CV__LOG_ONCE_CHECK_POST \ + _cv_log_once_ ## __LINE__ = true; + +#define CV__LOG_IF_CHECK(logging_cond) \ + if (!(logging_cond)) break; + +//! @endcond + + +// CV_LOG_ONCE_XXX macros + +#define CV_LOG_ONCE_ERROR(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_ONCE_INFO +#define CV_LOG_ONCE_INFO(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_ONCE_DEBUG +#define CV_LOG_ONCE_DEBUG(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_ONCE_VERBOSE +#define CV_LOG_ONCE_VERBOSE(tag, v, ...) +#endif + + +// CV_LOG_IF_XXX macros + +#define CV_LOG_IF_FATAL(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_ERROR(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_WARNING(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_INFO(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_DEBUG(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_IF_INFO +#define CV_LOG_IF_INFO(tag, logging_cond, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_IF_DEBUG +#define CV_LOG_IF_DEBUG(tag, logging_cond, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_IF_VERBOSE +#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...) +#endif + + +//! @} + +}}} // namespace + +#endif // OPENCV_LOGGER_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logtag.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logtag.hpp new file mode 100644 index 0000000..4089720 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/logtag.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_LOGTAG_HPP +#define OPENCV_CORE_LOGTAG_HPP + +#include "opencv2/core/cvstd.hpp" +#include "logger.defines.hpp" + +namespace cv { +namespace utils { +namespace logging { + +struct LogTag +{ + const char* name; + LogLevel level; + + inline LogTag(const char* _name, LogLevel _level) + : name(_name) + , level(_level) + {} +}; + +}}} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/tls.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/tls.hpp new file mode 100644 index 0000000..697a7b0 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/tls.hpp @@ -0,0 +1,233 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_TLS_HPP +#define OPENCV_UTILS_TLS_HPP + +#include + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +namespace details { class TlsStorage; } + +/** TLS container base implementation + * + * Don't use directly. + * + * @sa TLSData, TLSDataAccumulator templates + */ +class CV_EXPORTS TLSDataContainer +{ +protected: + TLSDataContainer(); + virtual ~TLSDataContainer(); + + /// @deprecated use detachData() instead + void gatherData(std::vector &data) const; + /// get TLS data and detach all data from threads (similar to cleanup() call) + void detachData(std::vector& data); + + void* getData() const; + void release(); + +protected: + virtual void* createDataInstance() const = 0; + virtual void deleteDataInstance(void* pData) const = 0; + +private: + int key_; + + friend class cv::details::TlsStorage; // core/src/system.cpp + +public: + void cleanup(); //!< Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid. + +private: + // Disable copy/assign (noncopyable pattern) + TLSDataContainer(TLSDataContainer &) = delete; + TLSDataContainer& operator =(const TLSDataContainer &) = delete; +}; + + +/** @brief Simple TLS data class + * + * @sa TLSDataAccumulator + */ +template +class TLSData : protected TLSDataContainer +{ +public: + inline TLSData() {} + inline ~TLSData() { release(); } + + inline T* get() const { return (T*)getData(); } //!< Get data associated with key + inline T& getRef() const { T* ptr = (T*)getData(); CV_DbgAssert(ptr); return *ptr; } //!< Get data associated with key + + /// Release associated thread data + inline void cleanup() + { + TLSDataContainer::cleanup(); + } + +protected: + /// Wrapper to allocate data by template + virtual void* createDataInstance() const CV_OVERRIDE { return new T; } + /// Wrapper to release data by template + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE { delete (T*)pData; } +}; + + +/// TLS data accumulator with gathering methods +template +class TLSDataAccumulator : public TLSData +{ + mutable cv::Mutex mutex; + mutable std::vector dataFromTerminatedThreads; + std::vector detachedData; + bool cleanupMode; +public: + TLSDataAccumulator() : cleanupMode(false) {} + ~TLSDataAccumulator() + { + release(); + } + + /** @brief Get data from all threads + * @deprecated replaced by detachData() + * + * Lifetime of vector data is valid until next detachData()/cleanup()/release() calls + * + * @param[out] data result buffer (should be empty) + */ + void gather(std::vector &data) const + { + CV_Assert(cleanupMode == false); // state is not valid + CV_Assert(data.empty()); + { + std::vector &dataVoid = reinterpret_cast&>(data); + TLSDataContainer::gatherData(dataVoid); + } + { + AutoLock lock(mutex); + data.reserve(data.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + data.push_back((T*)*i); + } + } + } + + /** @brief Get and detach data from all threads + * + * Call cleanupDetachedData() when returned vector is not needed anymore. + * + * @return Vector with associated data. Content is preserved (including lifetime of attached data pointers) until next detachData()/cleanupDetachedData()/cleanup()/release() calls + */ + std::vector& detachData() + { + CV_Assert(cleanupMode == false); // state is not valid + std::vector dataVoid; + { + TLSDataContainer::detachData(dataVoid); + } + { + AutoLock lock(mutex); + detachedData.reserve(dataVoid.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + detachedData.push_back((T*)*i); + } + dataFromTerminatedThreads.clear(); + for (typename std::vector::const_iterator i = dataVoid.begin(); i != dataVoid.end(); ++i) + { + detachedData.push_back((T*)(void*)*i); + } + } + dataVoid.clear(); + return detachedData; + } + + /// Release associated thread data returned by detachData() call + void cleanupDetachedData() + { + AutoLock lock(mutex); + cleanupMode = true; + _cleanupDetachedData(); + cleanupMode = false; + } + + /// Release associated thread data + void cleanup() + { + cleanupMode = true; + TLSDataContainer::cleanup(); + + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + cleanupMode = false; + } + + /// Release associated thread data and free TLS key + void release() + { + cleanupMode = true; + TLSDataContainer::release(); + { + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + } + } + +protected: + // synchronized + void _cleanupDetachedData() + { + for (typename std::vector::iterator i = detachedData.begin(); i != detachedData.end(); ++i) + { + deleteDataInstance((T*)*i); + } + detachedData.clear(); + } + + // synchronized + void _cleanupTerminatedData() + { + for (typename std::vector::iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + deleteDataInstance((T*)*i); + } + dataFromTerminatedThreads.clear(); + } + +protected: + virtual void* createDataInstance() const CV_OVERRIDE + { + // Note: we can collect all allocated data here, but this would require raced mutex locks + return new T; + } + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE + { + if (cleanupMode) + { + delete (T*)pData; + } + else + { + AutoLock lock(mutex); + dataFromTerminatedThreads.push_back((T*)pData); + } + } +}; + + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/trace.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/trace.hpp new file mode 100644 index 0000000..ef5d35b --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/utils/trace.hpp @@ -0,0 +1,252 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACE_HPP +#define OPENCV_TRACE_HPP + +#include + +namespace cv { +namespace utils { +namespace trace { + +//! @addtogroup core_logging +//! @{ + +//! Macro to trace function +#define CV_TRACE_FUNCTION() + +#define CV_TRACE_FUNCTION_SKIP_NESTED() + +//! Trace code scope. +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "initialize". +#define CV_TRACE_REGION(name_as_static_string_literal) +//! mark completed of the current opened region and create new one +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "step1". +#define CV_TRACE_REGION_NEXT(name_as_static_string_literal) + +//! Macro to trace argument value +#define CV_TRACE_ARG(arg_id) + +//! Macro to trace argument value (expanded version) +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) + +//! @cond IGNORED +#define CV_TRACE_NS cv::utils::trace + +#if !defined(OPENCV_DISABLE_TRACE) && defined(__EMSCRIPTEN__) +#define OPENCV_DISABLE_TRACE 1 +#endif + +namespace details { + +#ifndef __OPENCV_TRACE +# if defined __OPENCV_BUILD && !defined __OPENCV_TESTS && !defined __OPENCV_APPS +# define __OPENCV_TRACE 1 +# else +# define __OPENCV_TRACE 0 +# endif +#endif + +#ifndef CV_TRACE_FILENAME +# define CV_TRACE_FILENAME __FILE__ +#endif + +#ifndef CV__TRACE_FUNCTION +# if defined _MSC_VER +# define CV__TRACE_FUNCTION __FUNCSIG__ +# elif defined __GNUC__ +# define CV__TRACE_FUNCTION __PRETTY_FUNCTION__ +# else +# define CV__TRACE_FUNCTION "" +# endif +#endif + +//! Thread-local instance (usually allocated on stack) +class CV_EXPORTS Region +{ +public: + struct LocationExtraData; + struct LocationStaticStorage + { + LocationExtraData** ppExtra; //< implementation specific data + const char* name; //< region name (function name or other custom name) + const char* filename; //< source code filename + int line; //< source code line + int flags; //< flags (implementation code path: Plain, IPP, OpenCL) + }; + + Region(const LocationStaticStorage& location); + inline ~Region() + { + if (implFlags != 0) + destroy(); + CV_DbgAssert(implFlags == 0); + CV_DbgAssert(pImpl == NULL); + } + + class Impl; + Impl* pImpl; // NULL if current region is not active + int implFlags; // see RegionFlag, 0 if region is ignored + + bool isActive() const { return pImpl != NULL; } + + void destroy(); +private: + Region(const Region&); // disabled + Region& operator= (const Region&); // disabled +}; + +//! Specify region flags +enum RegionLocationFlag { + REGION_FLAG_FUNCTION = (1 << 0), //< region is function (=1) / nested named region (=0) + REGION_FLAG_APP_CODE = (1 << 1), //< region is Application code (=1) / OpenCV library code (=0) + REGION_FLAG_SKIP_NESTED = (1 << 2), //< avoid processing of nested regions + + REGION_FLAG_IMPL_IPP = (1 << 16), //< region is part of IPP code path + REGION_FLAG_IMPL_OPENCL = (2 << 16), //< region is part of OpenCL code path + REGION_FLAG_IMPL_OPENVX = (3 << 16), //< region is part of OpenVX code path + + REGION_FLAG_IMPL_MASK = (15 << 16), + + REGION_FLAG_REGION_FORCE = (1 << 30), + REGION_FLAG_REGION_NEXT = (1 << 31), //< close previous region (see #CV_TRACE_REGION_NEXT macro) + + ENUM_REGION_FLAG_FORCE_INT = INT_MAX +}; + +struct CV_EXPORTS TraceArg { +public: + struct ExtraData; + ExtraData** ppExtra; + const char* name; + int flags; +}; +/** @brief Add meta information to current region (function) + * See CV_TRACE_ARG macro + * @param arg argument information structure (global static cache) + * @param value argument value (can by dynamic string literal in case of string, static allocation is not required) + */ +CV_EXPORTS void traceArg(const TraceArg& arg, const char* value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int64 value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, double value); + +#define CV__TRACE_LOCATION_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_, loc_id), __LINE__) +#define CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_extra_, loc_id) , __LINE__) + +#define CV__TRACE_DEFINE_LOCATION_(loc_id, name, flags) \ + static CV_TRACE_NS::details::Region::LocationExtraData* CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) = 0; \ + static const CV_TRACE_NS::details::Region::LocationStaticStorage \ + CV__TRACE_LOCATION_VARNAME(loc_id) = { &(CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id)), name, CV_TRACE_FILENAME, __LINE__, flags}; + +#define CV__TRACE_DEFINE_LOCATION_FN(name, flags) CV__TRACE_DEFINE_LOCATION_(fn, name, ((flags) | CV_TRACE_NS::details::REGION_FLAG_FUNCTION)) + + +#define CV__TRACE_OPENCV_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME_SKIP_NESTED(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED | CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_REGION_(name_as_static_string_literal, flags) \ + CV__TRACE_DEFINE_LOCATION_(region, name_as_static_string_literal, flags); \ + CV_TRACE_NS::details::Region CVAUX_CONCAT(__region_, __LINE__)(CV__TRACE_LOCATION_VARNAME(region)); + +#define CV__TRACE_REGION(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, 0) +#define CV__TRACE_REGION_NEXT(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, CV_TRACE_NS::details::REGION_FLAG_REGION_NEXT) + +#define CV__TRACE_ARG_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_ ## arg_id, __LINE__) +#define CV__TRACE_ARG_EXTRA_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_extra_ ## arg_id, __LINE__) + +#define CV__TRACE_DEFINE_ARG_(arg_id, name, flags) \ + static CV_TRACE_NS::details::TraceArg::ExtraData* CV__TRACE_ARG_EXTRA_VARNAME(arg_id) = 0; \ + static const CV_TRACE_NS::details::TraceArg \ + CV__TRACE_ARG_VARNAME(arg_id) = { &(CV__TRACE_ARG_EXTRA_VARNAME(arg_id)), name, flags }; + +#define CV__TRACE_ARG_VALUE(arg_id, arg_name, value) \ + CV__TRACE_DEFINE_ARG_(arg_id, arg_name, 0); \ + CV_TRACE_NS::details::traceArg((CV__TRACE_ARG_VARNAME(arg_id)), value); + +#define CV__TRACE_ARG(arg_id) CV_TRACE_ARG_VALUE(arg_id, #arg_id, (arg_id)) + +} // namespace + +#ifndef OPENCV_DISABLE_TRACE +#undef CV_TRACE_FUNCTION +#undef CV_TRACE_FUNCTION_SKIP_NESTED +#if __OPENCV_TRACE +#define CV_TRACE_FUNCTION CV__TRACE_OPENCV_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED +#else +#define CV_TRACE_FUNCTION CV__TRACE_APP_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_APP_FUNCTION_SKIP_NESTED +#endif + +#undef CV_TRACE_REGION +#define CV_TRACE_REGION CV__TRACE_REGION + +#undef CV_TRACE_REGION_NEXT +#define CV_TRACE_REGION_NEXT CV__TRACE_REGION_NEXT + +#undef CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) \ + if (__region_fn.isActive()) \ + { \ + CV__TRACE_ARG_VALUE(arg_id, arg_name, value); \ + } + +#undef CV_TRACE_ARG +#define CV_TRACE_ARG CV__TRACE_ARG + +#endif // OPENCV_DISABLE_TRACE + +#ifdef OPENCV_TRACE_VERBOSE +#define CV_TRACE_FUNCTION_VERBOSE CV_TRACE_FUNCTION +#define CV_TRACE_REGION_VERBOSE CV_TRACE_REGION +#define CV_TRACE_REGION_NEXT_VERBOSE CV_TRACE_REGION_NEXT +#define CV_TRACE_ARG_VALUE_VERBOSE CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VERBOSE CV_TRACE_ARG +#else +#define CV_TRACE_FUNCTION_VERBOSE(...) +#define CV_TRACE_REGION_VERBOSE(...) +#define CV_TRACE_REGION_NEXT_VERBOSE(...) +#define CV_TRACE_ARG_VALUE_VERBOSE(...) +#define CV_TRACE_ARG_VERBOSE(...) +#endif + +//! @endcond + +//! @} + +}}} // namespace + +#endif // OPENCV_TRACE_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/va_intel.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/va_intel.hpp new file mode 100644 index 0000000..b37ce75 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/va_intel.hpp @@ -0,0 +1,75 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2015, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_CORE_VA_INTEL_HPP +#define OPENCV_CORE_VA_INTEL_HPP + +#ifndef __cplusplus +# error va_intel.hpp header must be compiled as C++ +#endif + +#include "opencv2/core.hpp" +#include "ocl.hpp" + +#if defined(HAVE_VA) +# include "va/va.h" +#else // HAVE_VA +# if !defined(_VA_H_) + typedef void* VADisplay; + typedef unsigned int VASurfaceID; +# endif // !_VA_H_ +#endif // HAVE_VA + +namespace cv { namespace va_intel { + +/** @addtogroup core_va_intel +This section describes Intel VA-API/OpenCL (CL-VA) interoperability. + +To enable basic VA interoperability build OpenCV with libva library integration enabled: `-DWITH_VA=ON` (corresponding dev package should be installed). + +To enable advanced CL-VA interoperability support on Intel HW, enable option: `-DWITH_VA_INTEL=ON` (OpenCL integration should be enabled which is the default setting). Special runtime environment should be set up in order to use this feature: correct combination of [libva](https://github.com/intel/libva), [OpenCL runtime](https://github.com/intel/compute-runtime) and [media driver](https://github.com/intel/media-driver) should be installed. + +Check usage example for details: samples/va_intel/va_intel_interop.cpp +*/ +//! @{ + +/////////////////// CL-VA Interoperability Functions /////////////////// + +namespace ocl { +using namespace cv::ocl; + +// TODO static functions in the Context class +/** @brief Creates OpenCL context from VA. +@param display - VADisplay for which CL interop should be established. +@param tryInterop - try to set up for interoperability, if true; set up for use slow copy if false. +@return Returns reference to OpenCL Context + */ +CV_EXPORTS Context& initializeContextFromVA(VADisplay display, bool tryInterop = true); + +} // namespace cv::va_intel::ocl + +/** @brief Converts InputArray to VASurfaceID object. +@param display - VADisplay object. +@param src - source InputArray. +@param surface - destination VASurfaceID object. +@param size - size of image represented by VASurfaceID object. + */ +CV_EXPORTS void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size); + +/** @brief Converts VASurfaceID object to OutputArray. +@param display - VADisplay object. +@param surface - source VASurfaceID object. +@param size - size of image represented by VASurfaceID object. +@param dst - destination OutputArray. + */ +CV_EXPORTS void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, OutputArray dst); + +//! @} + +}} // namespace cv::va_intel + +#endif /* OPENCV_CORE_VA_INTEL_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/version.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/version.hpp new file mode 100644 index 0000000..6112799 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/version.hpp @@ -0,0 +1,26 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_VERSION_HPP +#define OPENCV_VERSION_HPP + +#define CV_VERSION_MAJOR 4 +#define CV_VERSION_MINOR 5 +#define CV_VERSION_REVISION 1 +#define CV_VERSION_STATUS "" + +#define CVAUX_STR_EXP(__A) #__A +#define CVAUX_STR(__A) CVAUX_STR_EXP(__A) + +#define CVAUX_STRW_EXP(__A) L ## #__A +#define CVAUX_STRW(__A) CVAUX_STRW_EXP(__A) + +#define CV_VERSION CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS + +/* old style version constants*/ +#define CV_MAJOR_VERSION CV_VERSION_MAJOR +#define CV_MINOR_VERSION CV_VERSION_MINOR +#define CV_SUBMINOR_VERSION CV_VERSION_REVISION + +#endif // OPENCV_VERSION_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/vsx_utils.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/vsx_utils.hpp new file mode 100644 index 0000000..d796251 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/core/vsx_utils.hpp @@ -0,0 +1,1040 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_VSX_UTILS_HPP +#define OPENCV_HAL_VSX_UTILS_HPP + +#include "opencv2/core/cvdef.h" + +#ifndef SKIP_INCLUDES +# include +#endif + +//! @addtogroup core_utils_vsx +//! @{ +#if CV_VSX + +#define __VSX_S16__(c, v) (c){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v} +#define __VSX_S8__(c, v) (c){v, v, v, v, v, v, v, v} +#define __VSX_S4__(c, v) (c){v, v, v, v} +#define __VSX_S2__(c, v) (c){v, v} + +typedef __vector unsigned char vec_uchar16; +#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__} +#define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, (unsigned char)c)) +#define vec_uchar16_c(v) ((vec_uchar16)(v)) +#define vec_uchar16_z vec_uchar16_sp(0) + +typedef __vector signed char vec_char16; +#define vec_char16_set(...) (vec_char16){__VA_ARGS__} +#define vec_char16_sp(c) (__VSX_S16__(vec_char16, (signed char)c)) +#define vec_char16_c(v) ((vec_char16)(v)) +#define vec_char16_z vec_char16_sp(0) + +typedef __vector unsigned short vec_ushort8; +#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__} +#define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, (unsigned short)c)) +#define vec_ushort8_c(v) ((vec_ushort8)(v)) +#define vec_ushort8_z vec_ushort8_sp(0) + +typedef __vector signed short vec_short8; +#define vec_short8_set(...) (vec_short8){__VA_ARGS__} +#define vec_short8_sp(c) (__VSX_S8__(vec_short8, (signed short)c)) +#define vec_short8_c(v) ((vec_short8)(v)) +#define vec_short8_z vec_short8_sp(0) + +typedef __vector unsigned int vec_uint4; +#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__} +#define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, (unsigned int)c)) +#define vec_uint4_c(v) ((vec_uint4)(v)) +#define vec_uint4_z vec_uint4_sp(0) + +typedef __vector signed int vec_int4; +#define vec_int4_set(...) (vec_int4){__VA_ARGS__} +#define vec_int4_sp(c) (__VSX_S4__(vec_int4, (signed int)c)) +#define vec_int4_c(v) ((vec_int4)(v)) +#define vec_int4_z vec_int4_sp(0) + +typedef __vector float vec_float4; +#define vec_float4_set(...) (vec_float4){__VA_ARGS__} +#define vec_float4_sp(c) (__VSX_S4__(vec_float4, c)) +#define vec_float4_c(v) ((vec_float4)(v)) +#define vec_float4_z vec_float4_sp(0) + +typedef __vector unsigned long long vec_udword2; +#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__} +#define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, (unsigned long long)c)) +#define vec_udword2_c(v) ((vec_udword2)(v)) +#define vec_udword2_z vec_udword2_sp(0) + +typedef __vector signed long long vec_dword2; +#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__} +#define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, (signed long long)c)) +#define vec_dword2_c(v) ((vec_dword2)(v)) +#define vec_dword2_z vec_dword2_sp(0) + +typedef __vector double vec_double2; +#define vec_double2_set(...) (vec_double2){__VA_ARGS__} +#define vec_double2_c(v) ((vec_double2)(v)) +#define vec_double2_sp(c) (__VSX_S2__(vec_double2, c)) +#define vec_double2_z vec_double2_sp(0) + +#define vec_bchar16 __vector __bool char +#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__} +#define vec_bchar16_c(v) ((vec_bchar16)(v)) + +#define vec_bshort8 __vector __bool short +#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__} +#define vec_bshort8_c(v) ((vec_bshort8)(v)) + +#define vec_bint4 __vector __bool int +#define vec_bint4_set(...) (vec_bint4){__VA_ARGS__} +#define vec_bint4_c(v) ((vec_bint4)(v)) + +#define vec_bdword2 __vector __bool long long +#define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__} +#define vec_bdword2_c(v) ((vec_bdword2)(v)) + +#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline)) + +#define VSX_REDIRECT_1RG(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fn2(a); } + +#define VSX_REDIRECT_2RG(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); } + +/* + * GCC VSX compatibility +**/ +#if defined(__GNUG__) && !defined(__clang__) + +// inline asm helper +#define VSX_IMPL_1RG(rt, rg, opc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=wa" (rs) : "wa" (a)); return rs; } + +#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ rt rs; __asm__ __volatile__(#opc" %0,%1" : "=v" (rs) : "v" (a)); return rs; } + +#define VSX_IMPL_2VRG_F(rt, rg, fopc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a, const rg& b) \ +{ rt rs; __asm__ __volatile__(fopc : "=v" (rs) : "v" (a), "v" (b)); return rs; } + +#define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm) + +#if __GNUG__ < 8 + + // Support for int4 -> dword2 expanding multiply was added in GCC 8. + #ifdef vec_mule + #undef vec_mule + #endif + #ifdef vec_mulo + #undef vec_mulo + #endif + + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mulo, __builtin_vec_mulo) + + // dword2 support arrived in ISA 2.07 and GCC 8+ + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulosw, vec_mule) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmulouw, vec_mule) + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulesw, vec_mulo) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmuleuw, vec_mulo) + +#endif + +#if __GNUG__ < 7 +// up to GCC 6 vec_mul only supports precisions and llong +# ifdef vec_mul +# undef vec_mul +# endif +/* + * there's no a direct instruction for supporting 8-bit, 16-bit multiplication in ISA 2.07, + * XLC Implement it by using instruction "multiply even", "multiply odd" and "permute" +**/ +# define VSX_IMPL_MULH(Tvec, cperm) \ + VSX_FINLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b) \ + { \ + static const vec_uchar16 ev_od = {cperm}; \ + return vec_perm((Tvec)vec_mule(a, b), (Tvec)vec_mulo(a, b), ev_od); \ + } + #define VSX_IMPL_MULH_P16 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 + VSX_IMPL_MULH(vec_char16, VSX_IMPL_MULH_P16) + VSX_IMPL_MULH(vec_uchar16, VSX_IMPL_MULH_P16) + #define VSX_IMPL_MULH_P8 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29 + VSX_IMPL_MULH(vec_short8, VSX_IMPL_MULH_P8) + VSX_IMPL_MULH(vec_ushort8, VSX_IMPL_MULH_P8) + // vmuluwm can be used for unsigned or signed integers, that's what they said + VSX_IMPL_2VRG(vec_int4, vec_int4, vmuluwm, vec_mul) + VSX_IMPL_2VRG(vec_uint4, vec_uint4, vmuluwm, vec_mul) + // redirect to GCC builtin vec_mul, since it already supports precisions and llong + VSX_REDIRECT_2RG(vec_float4, vec_float4, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mul, __builtin_vec_mul) +#endif // __GNUG__ < 7 + +#if __GNUG__ < 6 +/* + * Instruction "compare greater than or equal" in ISA 2.07 only supports single + * and double precision. + * In XLC and new versions of GCC implement integers by using instruction "greater than" and NOR. +**/ +# ifdef vec_cmpge +# undef vec_cmpge +# endif +# ifdef vec_cmple +# undef vec_cmple +# endif +# define vec_cmple(a, b) vec_cmpge(b, a) +# define VSX_IMPL_CMPGE(rt, rg, opc, fnm) \ + VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%2,%1\n\t xxlnor %x0,%x0,%x0", fnm) + + VSX_IMPL_CMPGE(vec_bchar16, vec_char16, vcmpgtsb, vec_cmpge) + VSX_IMPL_CMPGE(vec_bchar16, vec_uchar16, vcmpgtub, vec_cmpge) + VSX_IMPL_CMPGE(vec_bshort8, vec_short8, vcmpgtsh, vec_cmpge) + VSX_IMPL_CMPGE(vec_bshort8, vec_ushort8, vcmpgtuh, vec_cmpge) + VSX_IMPL_CMPGE(vec_bint4, vec_int4, vcmpgtsw, vec_cmpge) + VSX_IMPL_CMPGE(vec_bint4, vec_uint4, vcmpgtuw, vec_cmpge) + VSX_IMPL_CMPGE(vec_bdword2, vec_dword2, vcmpgtsd, vec_cmpge) + VSX_IMPL_CMPGE(vec_bdword2, vec_udword2, vcmpgtud, vec_cmpge) + +// redirect to GCC builtin cmpge, since it already supports precisions + VSX_REDIRECT_2RG(vec_bint4, vec_float4, vec_cmpge, __builtin_vec_cmpge) + VSX_REDIRECT_2RG(vec_bdword2, vec_double2, vec_cmpge, __builtin_vec_cmpge) + +// up to gcc5 vec_nor doesn't support bool long long +# undef vec_nor + template + VSX_REDIRECT_2RG(T, T, vec_nor, __builtin_vec_nor) + + VSX_FINLINE(vec_bdword2) vec_nor(const vec_bdword2& a, const vec_bdword2& b) + { return vec_bdword2_c(__builtin_vec_nor(vec_dword2_c(a), vec_dword2_c(b))); } + +// vec_packs doesn't support double words in gcc4 and old versions of gcc5 +# undef vec_packs + VSX_REDIRECT_2RG(vec_char16, vec_short8, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_uchar16, vec_ushort8, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_short8, vec_int4, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_ushort8, vec_uint4, vec_packs, __builtin_vec_packs) + + VSX_IMPL_2VRG_F(vec_int4, vec_dword2, "vpksdss %0,%2,%1", vec_packs) + VSX_IMPL_2VRG_F(vec_uint4, vec_udword2, "vpkudus %0,%2,%1", vec_packs) +#endif // __GNUG__ < 6 + +#if __GNUG__ < 5 +// vec_xxpermdi in gcc4 missing little-endian supports just like clang +# define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) +// same as vec_xxpermdi +# undef vec_vbpermq + VSX_IMPL_2VRG(vec_udword2, vec_uchar16, vbpermq, vec_vbpermq) + VSX_IMPL_2VRG(vec_dword2, vec_char16, vbpermq, vec_vbpermq) +#else +# define vec_permi vec_xxpermdi +#endif // __GNUG__ < 5 + +// shift left double by word immediate +#ifndef vec_sldw +# define vec_sldw __builtin_vsx_xxsldwi +#endif + +// vector population count +VSX_IMPL_1VRG(vec_uchar16, vec_uchar16, vpopcntb, vec_popcntu) +VSX_IMPL_1VRG(vec_uchar16, vec_char16, vpopcntb, vec_popcntu) +VSX_IMPL_1VRG(vec_ushort8, vec_ushort8, vpopcnth, vec_popcntu) +VSX_IMPL_1VRG(vec_ushort8, vec_short8, vpopcnth, vec_popcntu) +VSX_IMPL_1VRG(vec_uint4, vec_uint4, vpopcntw, vec_popcntu) +VSX_IMPL_1VRG(vec_uint4, vec_int4, vpopcntw, vec_popcntu) +VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcntu) +VSX_IMPL_1VRG(vec_udword2, vec_dword2, vpopcntd, vec_popcntu) + +// converts between single and double-precision +VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) +VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) + +// converts word and doubleword to double-precision +#undef vec_ctd +VSX_IMPL_1RG(vec_double2, vec_int4, xvcvsxwdp, vec_ctdo) +VSX_IMPL_1RG(vec_double2, vec_uint4, xvcvuxwdp, vec_ctdo) +VSX_IMPL_1RG(vec_double2, vec_dword2, xvcvsxddp, vec_ctd) +VSX_IMPL_1RG(vec_double2, vec_udword2, xvcvuxddp, vec_ctd) + +// converts word and doubleword to single-precision +#undef vec_ctf +VSX_IMPL_1RG(vec_float4, vec_int4, xvcvsxwsp, vec_ctf) +VSX_IMPL_1RG(vec_float4, vec_uint4, xvcvuxwsp, vec_ctf) +VSX_IMPL_1RG(vec_float4, vec_dword2, xvcvsxdsp, vec_ctfo) +VSX_IMPL_1RG(vec_float4, vec_udword2, xvcvuxdsp, vec_ctfo) + +// converts single and double precision to signed word +#undef vec_cts +VSX_IMPL_1RG(vec_int4, vec_double2, xvcvdpsxws, vec_ctso) +VSX_IMPL_1RG(vec_int4, vec_float4, xvcvspsxws, vec_cts) + +// converts single and double precision to unsigned word +#undef vec_ctu +VSX_IMPL_1RG(vec_uint4, vec_double2, xvcvdpuxws, vec_ctuo) +VSX_IMPL_1RG(vec_uint4, vec_float4, xvcvspuxws, vec_ctu) + +// converts single and double precision to signed doubleword +#undef vec_ctsl +VSX_IMPL_1RG(vec_dword2, vec_double2, xvcvdpsxds, vec_ctsl) +VSX_IMPL_1RG(vec_dword2, vec_float4, xvcvspsxds, vec_ctslo) + +// converts single and double precision to unsigned doubleword +#undef vec_ctul +VSX_IMPL_1RG(vec_udword2, vec_double2, xvcvdpuxds, vec_ctul) +VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo) + +// just in case if GCC doesn't define it +#ifndef vec_xl +# define vec_xl vec_vsx_ld +# define vec_xst vec_vsx_st +#endif + +#endif // GCC VSX compatibility + +/* + * CLANG VSX compatibility +**/ +#if defined(__clang__) && !defined(__IBMCPP__) + +/* + * CLANG doesn't support %x in the inline asm template which fixes register number + * when using any of the register constraints wa, wd, wf + * + * For more explanation checkout PowerPC and IBM RS6000 in https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html + * Also there's already an open bug https://bugs.llvm.org/show_bug.cgi?id=31837 + * + * So we're not able to use inline asm and only use built-in functions that CLANG supports + * and use __builtin_convertvector if clang missing any of vector conversions built-in functions + * + * todo: clang asm template bug is fixed, need to reconsider the current workarounds. +*/ + +// convert vector helper +#define VSX_IMPL_CONVERT(rt, rg, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } + +#if __clang_major__ < 5 +// implement vec_permi in a dirty way +# define VSX_IMPL_CLANG_4_PERMI(Tvec) \ + VSX_FINLINE(Tvec) vec_permi(const Tvec& a, const Tvec& b, unsigned const char c) \ + { \ + switch (c) \ + { \ + case 0: \ + return vec_mergeh(a, b); \ + case 1: \ + return vec_mergel(vec_mergeh(a, a), b); \ + case 2: \ + return vec_mergeh(vec_mergel(a, a), b); \ + default: \ + return vec_mergel(a, b); \ + } \ + } + VSX_IMPL_CLANG_4_PERMI(vec_udword2) + VSX_IMPL_CLANG_4_PERMI(vec_dword2) + VSX_IMPL_CLANG_4_PERMI(vec_double2) + +// vec_xxsldwi is missing in clang 4 +# define vec_xxsldwi(a, b, c) vec_sld(a, b, (c) * 4) +#else +// vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4 +# define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) +#endif // __clang_major__ < 5 + +// shift left double by word immediate +#ifndef vec_sldw +# define vec_sldw vec_xxsldwi +#endif + +// Implement vec_rsqrt since clang only supports vec_rsqrte +#ifndef vec_rsqrt + VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a) + { return vec_div(vec_float4_sp(1), vec_sqrt(a)); } + + VSX_FINLINE(vec_double2) vec_rsqrt(const vec_double2& a) + { return vec_div(vec_double2_sp(1), vec_sqrt(a)); } +#endif + +// vec_promote missing support for doubleword +VSX_FINLINE(vec_dword2) vec_promote(long long a, int b) +{ + vec_dword2 ret = vec_dword2_z; + ret[b & 1] = a; + return ret; +} + +VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b) +{ + vec_udword2 ret = vec_udword2_z; + ret[b & 1] = a; + return ret; +} + +// vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt +#define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast) \ +VSX_FINLINE(Tvec) vec_popcntu(const Tvec2& a) \ +{ return ucast(vec_popcnt(a)); } +VSX_IMPL_POPCNTU(vec_uchar16, vec_char16, vec_uchar16_c); +VSX_IMPL_POPCNTU(vec_ushort8, vec_short8, vec_ushort8_c); +VSX_IMPL_POPCNTU(vec_uint4, vec_int4, vec_uint4_c); +VSX_IMPL_POPCNTU(vec_udword2, vec_dword2, vec_udword2_c); +// redirect unsigned types +VSX_REDIRECT_1RG(vec_uchar16, vec_uchar16, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_ushort8, vec_ushort8, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_uint4, vec_uint4, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_udword2, vec_udword2, vec_popcntu, vec_popcnt) + +// converts between single and double precision +VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) +VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) + +// converts word and doubleword to double-precision +#ifdef vec_ctd +# undef vec_ctd +#endif +VSX_REDIRECT_1RG(vec_double2, vec_int4, vec_ctdo, __builtin_vsx_xvcvsxwdp) +VSX_REDIRECT_1RG(vec_double2, vec_uint4, vec_ctdo, __builtin_vsx_xvcvuxwdp) + +VSX_IMPL_CONVERT(vec_double2, vec_dword2, vec_ctd) +VSX_IMPL_CONVERT(vec_double2, vec_udword2, vec_ctd) + +// converts word and doubleword to single-precision +#if __clang_major__ > 4 +# undef vec_ctf +#endif +VSX_IMPL_CONVERT(vec_float4, vec_int4, vec_ctf) +VSX_IMPL_CONVERT(vec_float4, vec_uint4, vec_ctf) +VSX_REDIRECT_1RG(vec_float4, vec_dword2, vec_ctfo, __builtin_vsx_xvcvsxdsp) +VSX_REDIRECT_1RG(vec_float4, vec_udword2, vec_ctfo, __builtin_vsx_xvcvuxdsp) + +// converts single and double precision to signed word +#if __clang_major__ > 4 +# undef vec_cts +#endif +VSX_REDIRECT_1RG(vec_int4, vec_double2, vec_ctso, __builtin_vsx_xvcvdpsxws) +VSX_IMPL_CONVERT(vec_int4, vec_float4, vec_cts) + +// converts single and double precision to unsigned word +#if __clang_major__ > 4 +# undef vec_ctu +#endif +VSX_REDIRECT_1RG(vec_uint4, vec_double2, vec_ctuo, __builtin_vsx_xvcvdpuxws) +VSX_IMPL_CONVERT(vec_uint4, vec_float4, vec_ctu) + +// converts single and double precision to signed doubleword +#ifdef vec_ctsl +# undef vec_ctsl +#endif +VSX_IMPL_CONVERT(vec_dword2, vec_double2, vec_ctsl) +// __builtin_convertvector unable to convert, xvcvspsxds is missing on it +VSX_FINLINE(vec_dword2) vec_ctslo(const vec_float4& a) +{ return vec_ctsl(vec_cvfo(a)); } + +// converts single and double precision to unsigned doubleword +#ifdef vec_ctul +# undef vec_ctul +#endif +VSX_IMPL_CONVERT(vec_udword2, vec_double2, vec_ctul) +// __builtin_convertvector unable to convert, xvcvspuxds is missing on it +VSX_FINLINE(vec_udword2) vec_ctulo(const vec_float4& a) +{ return vec_ctul(vec_cvfo(a)); } + +#endif // CLANG VSX compatibility + +/* + * Common GCC, CLANG compatibility +**/ +#if defined(__GNUG__) && !defined(__IBMCPP__) + +#ifdef vec_cvf +# undef vec_cvf +#endif + +#define VSX_IMPL_CONV_EVEN_4_2(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ return fn2(vec_sldw(a, a, 1)); } + +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_float4, vec_cvf, vec_cvfo) +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_int4, vec_ctd, vec_ctdo) +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_uint4, vec_ctd, vec_ctdo) + +VSX_IMPL_CONV_EVEN_4_2(vec_dword2, vec_float4, vec_ctsl, vec_ctslo) +VSX_IMPL_CONV_EVEN_4_2(vec_udword2, vec_float4, vec_ctul, vec_ctulo) + +#define VSX_IMPL_CONV_EVEN_2_4(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ \ + rt v4 = fn2(a); \ + return vec_sldw(v4, v4, 3); \ +} + +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_double2, vec_cvf, vec_cvfo) +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_dword2, vec_ctf, vec_ctfo) +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_udword2, vec_ctf, vec_ctfo) + +VSX_IMPL_CONV_EVEN_2_4(vec_int4, vec_double2, vec_cts, vec_ctso) +VSX_IMPL_CONV_EVEN_2_4(vec_uint4, vec_double2, vec_ctu, vec_ctuo) + +// Only for Eigen! +/* + * changing behavior of conversion intrinsics for gcc has effect on Eigen + * so we redefine old behavior again only on gcc, clang +*/ +#if !defined(__clang__) || __clang_major__ > 4 + // ignoring second arg since Eigen only truncates toward zero +# define VSX_IMPL_CONV_2VARIANT(rt, rg, fnm, fn2) \ + VSX_FINLINE(rt) fnm(const rg& a, int only_truncate) \ + { \ + assert(only_truncate == 0); \ + CV_UNUSED(only_truncate); \ + return fn2(a); \ + } + VSX_IMPL_CONV_2VARIANT(vec_int4, vec_float4, vec_cts, vec_cts) + VSX_IMPL_CONV_2VARIANT(vec_float4, vec_int4, vec_ctf, vec_ctf) + // define vec_cts for converting double precision to signed doubleword + // which isn't compatible with xlc but its okay since Eigen only uses it for gcc + VSX_IMPL_CONV_2VARIANT(vec_dword2, vec_double2, vec_cts, vec_ctsl) +#endif // Eigen + +#endif // Common GCC, CLANG compatibility + +/* + * XLC VSX compatibility +**/ +#if defined(__IBMCPP__) + +// vector population count +#define vec_popcntu vec_popcnt + +// overload and redirect with setting second arg to zero +// since we only support conversions without the second arg +#define VSX_IMPL_OVERLOAD_Z2(rt, rg, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fnm(a, 0); } + +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_int4, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_uint4, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_dword2, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_udword2, vec_ctd) + +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_int4, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_uint4, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_dword2, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_udword2, vec_ctf) + +VSX_IMPL_OVERLOAD_Z2(vec_int4, vec_double2, vec_cts) +VSX_IMPL_OVERLOAD_Z2(vec_int4, vec_float4, vec_cts) + +VSX_IMPL_OVERLOAD_Z2(vec_uint4, vec_double2, vec_ctu) +VSX_IMPL_OVERLOAD_Z2(vec_uint4, vec_float4, vec_ctu) + +VSX_IMPL_OVERLOAD_Z2(vec_dword2, vec_double2, vec_ctsl) +VSX_IMPL_OVERLOAD_Z2(vec_dword2, vec_float4, vec_ctsl) + +VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_double2, vec_ctul) +VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_float4, vec_ctul) + +// fixme: implement conversions of odd-numbered elements in a dirty way +// since xlc doesn't support VSX registers operand in inline asm. +#define VSX_IMPL_CONV_ODD_4_2(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fn2(vec_sldw(a, a, 3)); } + +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_float4, vec_cvfo, vec_cvf) +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_int4, vec_ctdo, vec_ctd) +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_uint4, vec_ctdo, vec_ctd) + +VSX_IMPL_CONV_ODD_4_2(vec_dword2, vec_float4, vec_ctslo, vec_ctsl) +VSX_IMPL_CONV_ODD_4_2(vec_udword2, vec_float4, vec_ctulo, vec_ctul) + +#define VSX_IMPL_CONV_ODD_2_4(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ \ + rt v4 = fn2(a); \ + return vec_sldw(v4, v4, 1); \ +} + +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_double2, vec_cvfo, vec_cvf) +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_dword2, vec_ctfo, vec_ctf) +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_udword2, vec_ctfo, vec_ctf) + +VSX_IMPL_CONV_ODD_2_4(vec_int4, vec_double2, vec_ctso, vec_cts) +VSX_IMPL_CONV_ODD_2_4(vec_uint4, vec_double2, vec_ctuo, vec_ctu) + +#endif // XLC VSX compatibility + +// ignore GCC warning that caused by -Wunused-but-set-variable in rare cases +#if defined(__GNUG__) && !defined(__clang__) +# define VSX_UNUSED(Tvec) Tvec __attribute__((__unused__)) +#else // CLANG, XLC +# define VSX_UNUSED(Tvec) Tvec +#endif + +// gcc can find his way in casting log int and XLC, CLANG ambiguous +#if defined(__clang__) || defined(__IBMCPP__) + VSX_FINLINE(vec_udword2) vec_splats(uint64 v) + { return vec_splats((unsigned long long) v); } + + VSX_FINLINE(vec_dword2) vec_splats(int64 v) + { return vec_splats((long long) v); } + + VSX_FINLINE(vec_udword2) vec_promote(uint64 a, int b) + { return vec_promote((unsigned long long) a, b); } + + VSX_FINLINE(vec_dword2) vec_promote(int64 a, int b) + { return vec_promote((long long) a, b); } +#endif + +/* + * implement vsx_ld(offset, pointer), vsx_st(vector, offset, pointer) + * load and set using offset depend on the pointer type + * + * implement vsx_ldf(offset, pointer), vsx_stf(vector, offset, pointer) + * load and set using offset depend on fixed bytes size + * + * Note: In clang vec_xl and vec_xst fails to load unaligned addresses + * so we are using vec_vsx_ld, vec_vsx_st instead +*/ + +#if defined(__clang__) && !defined(__IBMCPP__) +# define vsx_ldf vec_vsx_ld +# define vsx_stf vec_vsx_st +#else // GCC , XLC +# define vsx_ldf vec_xl +# define vsx_stf vec_xst +#endif + +#define VSX_OFFSET(o, p) ((o) * sizeof(*(p))) +#define vsx_ld(o, p) vsx_ldf(VSX_OFFSET(o, p), p) +#define vsx_st(v, o, p) vsx_stf(v, VSX_OFFSET(o, p), p) + +/* + * implement vsx_ld2(offset, pointer), vsx_st2(vector, offset, pointer) to load and store double words + * In GCC vec_xl and vec_xst it maps to vec_vsx_ld, vec_vsx_st which doesn't support long long + * and in CLANG we are using vec_vsx_ld, vec_vsx_st because vec_xl, vec_xst fails to load unaligned addresses + * + * In XLC vec_xl and vec_xst fail to cast int64(long int) to long long +*/ +#if (defined(__GNUG__) || defined(__clang__)) && !defined(__IBMCPP__) + VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p) + { return vec_udword2_c(vsx_ldf(VSX_OFFSET(o, p), (unsigned int*)p)); } + + VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p) + { return vec_dword2_c(vsx_ldf(VSX_OFFSET(o, p), (int*)p)); } + + VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p) + { vsx_stf(vec_uint4_c(vec), VSX_OFFSET(o, p), (unsigned int*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p) + { vsx_stf(vec_int4_c(vec), VSX_OFFSET(o, p), (int*)p); } +#else // XLC + VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p) + { return vsx_ldf(VSX_OFFSET(o, p), (unsigned long long*)p); } + + VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p) + { return vsx_ldf(VSX_OFFSET(o, p), (long long*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p) + { vsx_stf(vec, VSX_OFFSET(o, p), (unsigned long long*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p) + { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); } +#endif + +// Store lower 8 byte +#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0) + +// Store higher 8 byte +#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1) + +// Load 64-bits of integer data to lower part +#define VSX_IMPL_LOAD_L8(Tvec, Tp) \ +VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \ +{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } + +VSX_IMPL_LOAD_L8(vec_uchar16, uchar) +VSX_IMPL_LOAD_L8(vec_char16, schar) +VSX_IMPL_LOAD_L8(vec_ushort8, ushort) +VSX_IMPL_LOAD_L8(vec_short8, short) +VSX_IMPL_LOAD_L8(vec_uint4, uint) +VSX_IMPL_LOAD_L8(vec_int4, int) +VSX_IMPL_LOAD_L8(vec_float4, float) +VSX_IMPL_LOAD_L8(vec_udword2, uint64) +VSX_IMPL_LOAD_L8(vec_dword2, int64) +VSX_IMPL_LOAD_L8(vec_double2, double) + +// logical not +#define vec_not(a) vec_nor(a, a) + +// power9 yaya +// not equal +#ifndef vec_cmpne +# define vec_cmpne(a, b) vec_not(vec_cmpeq(a, b)) +#endif + +// absolute difference +#ifndef vec_absd +# define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b)) +#endif + +/* + * Implement vec_unpacklu and vec_unpackhu + * since vec_unpackl, vec_unpackh only support signed integers +**/ +#define VSX_IMPL_UNPACKU(rt, rg, zero) \ +VSX_FINLINE(rt) vec_unpacklu(const rg& a) \ +{ return (rt)(vec_mergel(a, zero)); } \ +VSX_FINLINE(rt) vec_unpackhu(const rg& a) \ +{ return (rt)(vec_mergeh(a, zero)); } + +VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z) +VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z) +VSX_IMPL_UNPACKU(vec_udword2, vec_uint4, vec_uint4_z) + +/* + * Implement vec_mergesqe and vec_mergesqo + * Merges the sequence values of even and odd elements of two vectors +*/ +#define VSX_IMPL_PERM(rt, fnm, ...) \ +VSX_FINLINE(rt) fnm(const rt& a, const rt& b) \ +{ static const vec_uchar16 perm = {__VA_ARGS__}; return vec_perm(a, b, perm); } + +// 16 +#define perm16_mergesqe 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +#define perm16_mergesqo 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 +VSX_IMPL_PERM(vec_uchar16, vec_mergesqe, perm16_mergesqe) +VSX_IMPL_PERM(vec_uchar16, vec_mergesqo, perm16_mergesqo) +VSX_IMPL_PERM(vec_char16, vec_mergesqe, perm16_mergesqe) +VSX_IMPL_PERM(vec_char16, vec_mergesqo, perm16_mergesqo) +// 8 +#define perm8_mergesqe 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 +#define perm8_mergesqo 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 +VSX_IMPL_PERM(vec_ushort8, vec_mergesqe, perm8_mergesqe) +VSX_IMPL_PERM(vec_ushort8, vec_mergesqo, perm8_mergesqo) +VSX_IMPL_PERM(vec_short8, vec_mergesqe, perm8_mergesqe) +VSX_IMPL_PERM(vec_short8, vec_mergesqo, perm8_mergesqo) +// 4 +#define perm4_mergesqe 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +#define perm4_mergesqo 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +VSX_IMPL_PERM(vec_uint4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_uint4, vec_mergesqo, perm4_mergesqo) +VSX_IMPL_PERM(vec_int4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_int4, vec_mergesqo, perm4_mergesqo) +VSX_IMPL_PERM(vec_float4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_float4, vec_mergesqo, perm4_mergesqo) +// 2 +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqo, vec_mergel) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqo, vec_mergel) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqo, vec_mergel) + +/* + * Implement vec_mergesqh and vec_mergesql + * Merges the sequence most and least significant halves of two vectors +*/ +#define VSX_IMPL_MERGESQHL(Tvec) \ +VSX_FINLINE(Tvec) vec_mergesqh(const Tvec& a, const Tvec& b) \ +{ return (Tvec)vec_mergeh(vec_udword2_c(a), vec_udword2_c(b)); } \ +VSX_FINLINE(Tvec) vec_mergesql(const Tvec& a, const Tvec& b) \ +{ return (Tvec)vec_mergel(vec_udword2_c(a), vec_udword2_c(b)); } +VSX_IMPL_MERGESQHL(vec_uchar16) +VSX_IMPL_MERGESQHL(vec_char16) +VSX_IMPL_MERGESQHL(vec_ushort8) +VSX_IMPL_MERGESQHL(vec_short8) +VSX_IMPL_MERGESQHL(vec_uint4) +VSX_IMPL_MERGESQHL(vec_int4) +VSX_IMPL_MERGESQHL(vec_float4) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesql, vec_mergel) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesql, vec_mergel) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesql, vec_mergel) + + +// 2 and 4 channels interleave for all types except 2 lanes +#define VSX_IMPL_ST_INTERLEAVE(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \ +{ \ + vsx_stf(vec_mergeh(a, b), 0, ptr); \ + vsx_stf(vec_mergel(a, b), 16, ptr); \ +} \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, const Tvec& d, Tp* ptr) \ +{ \ + Tvec ac = vec_mergeh(a, c); \ + Tvec bd = vec_mergeh(b, d); \ + vsx_stf(vec_mergeh(ac, bd), 0, ptr); \ + vsx_stf(vec_mergel(ac, bd), 16, ptr); \ + ac = vec_mergel(a, c); \ + bd = vec_mergel(b, d); \ + vsx_stf(vec_mergeh(ac, bd), 32, ptr); \ + vsx_stf(vec_mergel(ac, bd), 48, ptr); \ +} +VSX_IMPL_ST_INTERLEAVE(uchar, vec_uchar16) +VSX_IMPL_ST_INTERLEAVE(schar, vec_char16) +VSX_IMPL_ST_INTERLEAVE(ushort, vec_ushort8) +VSX_IMPL_ST_INTERLEAVE(short, vec_short8) +VSX_IMPL_ST_INTERLEAVE(uint, vec_uint4) +VSX_IMPL_ST_INTERLEAVE(int, vec_int4) +VSX_IMPL_ST_INTERLEAVE(float, vec_float4) + +// 2 and 4 channels deinterleave for 16 lanes +#define VSX_IMPL_ST_DINTERLEAVE_8(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(16, ptr); \ + a = vec_mergesqe(v0, v1); \ + b = vec_mergesqo(v0, v1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(16, ptr); \ + Tvec v2 = vsx_ld(32, ptr); \ + Tvec v3 = vsx_ld(48, ptr); \ + Tvec m0 = vec_mergesqe(v0, v1); \ + Tvec m1 = vec_mergesqe(v2, v3); \ + a = vec_mergesqe(m0, m1); \ + c = vec_mergesqo(m0, m1); \ + m0 = vec_mergesqo(v0, v1); \ + m1 = vec_mergesqo(v2, v3); \ + b = vec_mergesqe(m0, m1); \ + d = vec_mergesqo(m0, m1); \ +} +VSX_IMPL_ST_DINTERLEAVE_8(uchar, vec_uchar16) +VSX_IMPL_ST_DINTERLEAVE_8(schar, vec_char16) + +// 2 and 4 channels deinterleave for 8 lanes +#define VSX_IMPL_ST_DINTERLEAVE_16(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(8, ptr); \ + a = vec_mergesqe(v0, v1); \ + b = vec_mergesqo(v0, v1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(8, ptr); \ + Tvec m0 = vec_mergeh(v0, v1); \ + Tvec m1 = vec_mergel(v0, v1); \ + Tvec ab0 = vec_mergeh(m0, m1); \ + Tvec cd0 = vec_mergel(m0, m1); \ + v0 = vsx_ld(16, ptr); \ + v1 = vsx_ld(24, ptr); \ + m0 = vec_mergeh(v0, v1); \ + m1 = vec_mergel(v0, v1); \ + Tvec ab1 = vec_mergeh(m0, m1); \ + Tvec cd1 = vec_mergel(m0, m1); \ + a = vec_mergesqh(ab0, ab1); \ + b = vec_mergesql(ab0, ab1); \ + c = vec_mergesqh(cd0, cd1); \ + d = vec_mergesql(cd0, cd1); \ +} +VSX_IMPL_ST_DINTERLEAVE_16(ushort, vec_ushort8) +VSX_IMPL_ST_DINTERLEAVE_16(short, vec_short8) + +// 2 and 4 channels deinterleave for 4 lanes +#define VSX_IMPL_ST_DINTERLEAVE_32(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + a = vsx_ld(0, ptr); \ + b = vsx_ld(4, ptr); \ + Tvec m0 = vec_mergeh(a, b); \ + Tvec m1 = vec_mergel(a, b); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(4, ptr); \ + Tvec v2 = vsx_ld(8, ptr); \ + Tvec v3 = vsx_ld(12, ptr); \ + Tvec m0 = vec_mergeh(v0, v2); \ + Tvec m1 = vec_mergeh(v1, v3); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ + m0 = vec_mergel(v0, v2); \ + m1 = vec_mergel(v1, v3); \ + c = vec_mergeh(m0, m1); \ + d = vec_mergel(m0, m1); \ +} +VSX_IMPL_ST_DINTERLEAVE_32(uint, vec_uint4) +VSX_IMPL_ST_DINTERLEAVE_32(int, vec_int4) +VSX_IMPL_ST_DINTERLEAVE_32(float, vec_float4) + +// 2 and 4 channels interleave and deinterleave for 2 lanes +#define VSX_IMPL_ST_D_INTERLEAVE_64(Tp, Tvec, ld_func, st_func) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_mergel(a, b), 2, ptr); \ +} \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, const Tvec& d, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_mergeh(c, d), 2, ptr); \ + st_func(vec_mergel(a, b), 4, ptr); \ + st_func(vec_mergel(c, d), 6, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec m0 = ld_func(0, ptr); \ + Tvec m1 = ld_func(2, ptr); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = ld_func(0, ptr); \ + Tvec v1 = ld_func(2, ptr); \ + Tvec v2 = ld_func(4, ptr); \ + Tvec v3 = ld_func(6, ptr); \ + a = vec_mergeh(v0, v2); \ + b = vec_mergel(v0, v2); \ + c = vec_mergeh(v1, v3); \ + d = vec_mergel(v1, v3); \ +} +VSX_IMPL_ST_D_INTERLEAVE_64(int64, vec_dword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_D_INTERLEAVE_64(double, vec_double2, vsx_ld, vsx_st) + +/* 3 channels */ +#define VSX_IMPL_ST_INTERLEAVE_3CH_16(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + static const vec_uchar16 a12 = {0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5}; \ + static const vec_uchar16 a123 = {0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr); \ + static const vec_uchar16 b12 = {21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26}; \ + static const vec_uchar16 b123 = {0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 16, ptr); \ + static const vec_uchar16 c12 = {0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0}; \ + static const vec_uchar16 c123 = {26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31}; \ + vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 32, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(16, ptr); \ + Tvec v3 = vsx_ld(32, ptr); \ + static const vec_uchar16 a12_perm = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 20, 23, 26, 29}; \ + a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm); \ + static const vec_uchar16 b12_perm = {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 21, 24, 27, 30}; \ + b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm); \ + static const vec_uchar16 c12_perm = {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 19, 22, 25, 28, 31}; \ + c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_16(uchar, vec_uchar16) +VSX_IMPL_ST_INTERLEAVE_3CH_16(schar, vec_char16) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_8(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + static const vec_uchar16 a12 = {0, 1, 16, 17, 0, 0, 2, 3, 18, 19, 0, 0, 4, 5, 20, 21}; \ + static const vec_uchar16 a123 = {0, 1, 2, 3, 16, 17, 6, 7, 8, 9, 18, 19, 12, 13, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr); \ + static const vec_uchar16 b12 = {0, 0, 6, 7, 22, 23, 0, 0, 8, 9, 24, 25, 0, 0, 10, 11}; \ + static const vec_uchar16 b123 = {20, 21, 2, 3, 4, 5, 22, 23, 8, 9, 10, 11, 24, 25, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 8, ptr); \ + static const vec_uchar16 c12 = {26, 27, 0, 0, 12, 13, 28, 29, 0, 0, 14, 15, 30, 31, 0, 0}; \ + static const vec_uchar16 c123 = {0, 1, 26, 27, 4, 5, 6, 7, 28, 29, 10, 11, 12, 13, 30, 31}; \ + vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 16, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(8, ptr); \ + Tvec v3 = vsx_ld(16, ptr); \ + static const vec_uchar16 a12_perm = {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 0, 0, 0, 0}; \ + static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 26, 27}; \ + a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm); \ + static const vec_uchar16 b12_perm = {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 22, 23, 28, 29}; \ + b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm); \ + static const vec_uchar16 c12_perm = {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 19, 24, 25, 30, 31}; \ + c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_8(ushort, vec_ushort8) +VSX_IMPL_ST_INTERLEAVE_3CH_8(short, vec_short8) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_4(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + Tvec hbc = vec_mergeh(b, c); \ + static const vec_uchar16 ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7}; \ + vsx_st(vec_perm(a, hbc, ahbc), 0, ptr); \ + Tvec lab = vec_mergel(a, b); \ + vsx_st(vec_sld(lab, hbc, 8), 4, ptr); \ + static const vec_uchar16 clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};\ + vsx_st(vec_perm(c, lab, clab), 8, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(4, ptr); \ + Tvec v3 = vsx_ld(8, ptr); \ + static const vec_uchar16 flp = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31}; \ + a = vec_perm(v1, vec_sld(v3, v2, 8), flp); \ + static const vec_uchar16 flp2 = {28, 29, 30, 31, 0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19}; \ + b = vec_perm(v2, vec_sld(v1, v3, 8), flp2); \ + c = vec_perm(vec_sld(v2, v1, 8), v3, flp); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_4(uint, vec_uint4) +VSX_IMPL_ST_INTERLEAVE_3CH_4(int, vec_int4) +VSX_IMPL_ST_INTERLEAVE_3CH_4(float, vec_float4) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_2(Tp, Tvec, ld_func, st_func) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_permi(c, a, 1), 2, ptr); \ + st_func(vec_mergel(b, c), 4, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, \ + Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = ld_func(0, ptr); \ + Tvec v2 = ld_func(2, ptr); \ + Tvec v3 = ld_func(4, ptr); \ + a = vec_permi(v1, v2, 1); \ + b = vec_permi(v1, v3, 2); \ + c = vec_permi(v2, v3, 1); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_2(int64, vec_dword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_INTERLEAVE_3CH_2(uint64, vec_udword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_INTERLEAVE_3CH_2(double, vec_double2, vsx_ld, vsx_st) + +#endif // CV_VSX + +//! @} + +#endif // OPENCV_HAL_VSX_UTILS_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/cvconfig.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/cvconfig.h new file mode 100644 index 0000000..8d30931 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/cvconfig.h @@ -0,0 +1,167 @@ +#ifndef OPENCV_CVCONFIG_H_INCLUDED +#define OPENCV_CVCONFIG_H_INCLUDED + +/* OpenCV compiled as static or dynamic libs */ +/* #undef BUILD_SHARED_LIBS */ + +/* OpenCV intrinsics optimized code */ +#define CV_ENABLE_INTRINSICS + +/* OpenCV additional optimized code */ +/* #undef CV_DISABLE_OPTIMIZATION */ + +/* Compile for 'real' NVIDIA GPU architectures */ +#define CUDA_ARCH_BIN "" + +/* NVIDIA GPU features are used */ +#define CUDA_ARCH_FEATURES "" + +/* Compile for 'virtual' NVIDIA PTX architectures */ +#define CUDA_ARCH_PTX "" + +/* AMD's Basic Linear Algebra Subprograms Library*/ +/* #undef HAVE_CLAMDBLAS */ + +/* AMD's OpenCL Fast Fourier Transform Library*/ +/* #undef HAVE_CLAMDFFT */ + +/* Clp support */ +/* #undef HAVE_CLP */ + +/* Cocoa API */ +/* #undef HAVE_COCOA */ + +/* NVIDIA CUDA Runtime API*/ +/* #undef HAVE_CUDA */ + +/* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/ +/* #undef HAVE_CUBLAS */ + +/* NVIDIA CUDA Deep Neural Network (cuDNN) API*/ +/* #undef HAVE_CUDNN */ + +/* NVIDIA CUDA Fast Fourier Transform (FFT) API*/ +/* #undef HAVE_CUFFT */ + +/* DirectX */ +/* #undef HAVE_DIRECTX */ +/* #undef HAVE_DIRECTX_NV12 */ +/* #undef HAVE_D3D11 */ +/* #undef HAVE_D3D10 */ +/* #undef HAVE_D3D9 */ + +/* Eigen Matrix & Linear Algebra Library */ +/* #undef HAVE_EIGEN */ + +/* Geospatial Data Abstraction Library */ +/* #undef HAVE_GDAL */ + +/* GTK+ 2.0 Thread support */ +/* #undef HAVE_GTHREAD */ + +/* GTK+ 2.x toolkit */ +/* #undef HAVE_GTK */ + +/* Halide support */ +/* #undef HAVE_HALIDE */ + +/* Vulkan support */ +/* #undef HAVE_VULKAN */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_INTTYPES_H */ + +/* Intel Integrated Performance Primitives */ +/* #undef HAVE_IPP */ +/* #undef HAVE_IPP_ICV */ +/* #undef HAVE_IPP_IW */ +/* #undef HAVE_IPP_IW_LL */ + +/* JPEG-2000 codec */ +/* #undef HAVE_OPENJPEG */ +/* #undef HAVE_JASPER */ + +/* IJG JPEG codec */ +/* #undef HAVE_JPEG */ + +/* libpng/png.h needs to be included */ +/* #undef HAVE_LIBPNG_PNG_H */ + +/* GDCM DICOM codec */ +/* #undef HAVE_GDCM */ + +/* NVIDIA Video Decoding API*/ +/* #undef HAVE_NVCUVID */ +/* #undef HAVE_NVCUVID_HEADER */ +/* #undef HAVE_DYNLINK_NVCUVID_HEADER */ + +/* NVIDIA Video Encoding API*/ +/* #undef HAVE_NVCUVENC */ + +/* OpenCL Support */ +/* #undef HAVE_OPENCL */ +/* #undef HAVE_OPENCL_STATIC */ +/* #undef HAVE_OPENCL_SVM */ + +/* NVIDIA OpenCL D3D Extensions support */ +/* #undef HAVE_OPENCL_D3D11_NV */ + +/* OpenEXR codec */ +/* #undef HAVE_OPENEXR */ + +/* OpenGL support*/ +/* #undef HAVE_OPENGL */ + +/* PNG codec */ +/* #undef HAVE_PNG */ + +/* Posix threads (pthreads) */ +#define HAVE_PTHREAD + +/* parallel_for with pthreads */ +/* #undef HAVE_PTHREADS_PF */ + +/* Qt support */ +/* #undef HAVE_QT */ + +/* Qt OpenGL support */ +/* #undef HAVE_QT_OPENGL */ + +/* Intel Threading Building Blocks */ +/* #undef HAVE_TBB */ + +/* Ste||ar Group High Performance ParallelX */ +/* #undef HAVE_HPX */ + +/* TIFF codec */ +/* #undef HAVE_TIFF */ + +/* Win32 UI */ +/* #undef HAVE_WIN32UI */ + +/* Define if your processor stores words with the most significant byte + first (like Motorola and SPARC, unlike Intel and VAX). */ +/* #undef WORDS_BIGENDIAN */ + +/* VA library (libva) */ +/* #undef HAVE_VA */ + +/* Intel VA-API/OpenCL */ +/* #undef HAVE_VA_INTEL */ + +/* Lapack */ +/* #undef HAVE_LAPACK */ + +/* Library was compiled with functions instrumentation */ +/* #undef ENABLE_INSTRUMENTATION */ + +/* OpenVX */ +/* #undef HAVE_OPENVX */ + +/* OpenCV trace utilities */ +/* #undef OPENCV_TRACE */ + +/* Library QR-code decoding */ +/* #undef HAVE_QUIRC */ + +#endif // OPENCV_CVCONFIG_H_INCLUDED diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d.hpp new file mode 100644 index 0000000..829d8ff --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d.hpp @@ -0,0 +1,1546 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_FEATURES_2D_HPP +#define OPENCV_FEATURES_2D_HPP + +#include "opencv2/opencv_modules.hpp" +#include "opencv2/core.hpp" + +#ifdef HAVE_OPENCV_FLANN +#include "opencv2/flann/miniflann.hpp" +#endif + +/** + @defgroup features2d 2D Features Framework + @{ + @defgroup features2d_main Feature Detection and Description + @defgroup features2d_match Descriptor Matchers + +Matchers of keypoint descriptors in OpenCV have wrappers with a common interface that enables you to +easily switch between different algorithms solving the same problem. This section is devoted to +matching descriptors that are represented as vectors in a multidimensional space. All objects that +implement vector descriptor matchers inherit the DescriptorMatcher interface. + +@note + - An example explaining keypoint matching can be found at + opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp + - An example on descriptor matching evaluation can be found at + opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp + - An example on one to many image matching can be found at + opencv_source_code/samples/cpp/matching_to_many_images.cpp + + @defgroup features2d_draw Drawing Function of Keypoints and Matches + @defgroup features2d_category Object Categorization + +This section describes approaches based on local 2D features and used to categorize objects. + +@note + - A complete Bag-Of-Words sample can be found at + opencv_source_code/samples/cpp/bagofwords_classification.cpp + - (Python) An example using the features2D framework to perform object categorization can be + found at opencv_source_code/samples/python/find_obj.py + + @defgroup feature2d_hal Hardware Acceleration Layer + @{ + @defgroup features2d_hal_interface Interface + @} + @} + */ + +namespace cv +{ + +//! @addtogroup features2d +//! @{ + +// //! writes vector of keypoints to the file storage +// CV_EXPORTS void write(FileStorage& fs, const String& name, const std::vector& keypoints); +// //! reads vector of keypoints from the specified file storage node +// CV_EXPORTS void read(const FileNode& node, CV_OUT std::vector& keypoints); + +/** @brief A class filters a vector of keypoints. + + Because now it is difficult to provide a convenient interface for all usage scenarios of the + keypoints filter class, it has only several needed by now static methods. + */ +class CV_EXPORTS KeyPointsFilter +{ +public: + KeyPointsFilter(){} + + /* + * Remove keypoints within borderPixels of an image edge. + */ + static void runByImageBorder( std::vector& keypoints, Size imageSize, int borderSize ); + /* + * Remove keypoints of sizes out of range. + */ + static void runByKeypointSize( std::vector& keypoints, float minSize, + float maxSize=FLT_MAX ); + /* + * Remove keypoints from some image by mask for pixels of this image. + */ + static void runByPixelsMask( std::vector& keypoints, const Mat& mask ); + /* + * Remove duplicated keypoints. + */ + static void removeDuplicated( std::vector& keypoints ); + /* + * Remove duplicated keypoints and sort the remaining keypoints + */ + static void removeDuplicatedSorted( std::vector& keypoints ); + + /* + * Retain the specified number of the best keypoints (according to the response) + */ + static void retainBest( std::vector& keypoints, int npoints ); +}; + + +/************************************ Base Classes ************************************/ + +/** @brief Abstract base class for 2D image feature detectors and descriptor extractors +*/ +#ifdef __EMSCRIPTEN__ +class CV_EXPORTS_W Feature2D : public Algorithm +#else +class CV_EXPORTS_W Feature2D : public virtual Algorithm +#endif +{ +public: + virtual ~Feature2D(); + + /** @brief Detects keypoints in an image (first variant) or image set (second variant). + + @param image Image. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer + matrix with non-zero values in the region of interest. + */ + CV_WRAP virtual void detect( InputArray image, + CV_OUT std::vector& keypoints, + InputArray mask=noArray() ); + + /** @overload + @param images Image set. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param masks Masks for each input image specifying where to look for keypoints (optional). + masks[i] is a mask for images[i]. + */ + CV_WRAP virtual void detect( InputArrayOfArrays images, + CV_OUT std::vector >& keypoints, + InputArrayOfArrays masks=noArray() ); + + /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set + (second variant). + + @param image Image. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. + */ + CV_WRAP virtual void compute( InputArray image, + CV_OUT CV_IN_OUT std::vector& keypoints, + OutputArray descriptors ); + + /** @overload + + @param images Image set. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. + */ + CV_WRAP virtual void compute( InputArrayOfArrays images, + CV_OUT CV_IN_OUT std::vector >& keypoints, + OutputArrayOfArrays descriptors ); + + /** Detects keypoints and computes the descriptors */ + CV_WRAP virtual void detectAndCompute( InputArray image, InputArray mask, + CV_OUT std::vector& keypoints, + OutputArray descriptors, + bool useProvidedKeypoints=false ); + + CV_WRAP virtual int descriptorSize() const; + CV_WRAP virtual int descriptorType() const; + CV_WRAP virtual int defaultNorm() const; + + CV_WRAP void write( const String& fileName ) const; + + CV_WRAP void read( const String& fileName ); + + virtual void write( FileStorage&) const CV_OVERRIDE; + + // see corresponding cv::Algorithm method + CV_WRAP virtual void read( const FileNode&) CV_OVERRIDE; + + //! Return true if detector object is empty + CV_WRAP virtual bool empty() const CV_OVERRIDE; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; + + // see corresponding cv::Algorithm method + CV_WRAP inline void write(const Ptr& fs, const String& name = String()) const { Algorithm::write(fs, name); } +}; + +/** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch +between different algorithms solving the same problem. All objects that implement keypoint detectors +inherit the FeatureDetector interface. */ +typedef Feature2D FeatureDetector; + +/** Extractors of keypoint descriptors in OpenCV have wrappers with a common interface that enables you +to easily switch between different algorithms solving the same problem. This section is devoted to +computing descriptors represented as vectors in a multidimensional space. All objects that implement +the vector descriptor extractors inherit the DescriptorExtractor interface. + */ +typedef Feature2D DescriptorExtractor; + +//! @addtogroup features2d_main +//! @{ + + +/** @brief Class for implementing the wrapper which makes detectors and extractors to be affine invariant, +described as ASIFT in @cite YM11 . +*/ +class CV_EXPORTS_W AffineFeature : public Feature2D +{ +public: + /** + @param backend The detector/extractor you want to use as backend. + @param maxTilt The highest power index of tilt factor. 5 is used in the paper as tilt sampling range n. + @param minTilt The lowest power index of tilt factor. 0 is used in the paper. + @param tiltStep Tilt sampling step \f$\delta_t\f$ in Algorithm 1 in the paper. + @param rotateStepBase Rotation sampling step factor b in Algorithm 1 in the paper. + */ + CV_WRAP static Ptr create(const Ptr& backend, + int maxTilt = 5, int minTilt = 0, float tiltStep = 1.4142135623730951f, float rotateStepBase = 72); + + CV_WRAP virtual void setViewParams(const std::vector& tilts, const std::vector& rolls) = 0; + CV_WRAP virtual void getViewParams(std::vector& tilts, std::vector& rolls) const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +typedef AffineFeature AffineFeatureDetector; +typedef AffineFeature AffineDescriptorExtractor; + + +/** @brief Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform +(SIFT) algorithm by D. Lowe @cite Lowe04 . +*/ +class CV_EXPORTS_W SIFT : public Feature2D +{ +public: + /** + @param nfeatures The number of best features to retain. The features are ranked by their scores + (measured in SIFT algorithm as the local contrast) + + @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The + number of octaves is computed automatically from the image resolution. + + @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform + (low-contrast) regions. The larger the threshold, the less features are produced by the detector. + + @note The contrast threshold will be divided by nOctaveLayers when the filtering is applied. When + nOctaveLayers is set to default and if you want to use the value used in D. Lowe paper, 0.03, set + this argument to 0.09. + + @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning + is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are + filtered out (more features are retained). + + @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image + is captured with a weak camera with soft lenses, you might want to reduce the number. + */ + CV_WRAP static Ptr create(int nfeatures = 0, int nOctaveLayers = 3, + double contrastThreshold = 0.04, double edgeThreshold = 10, + double sigma = 1.6); + + /** @brief Create SIFT with specified descriptorType. + @param nfeatures The number of best features to retain. The features are ranked by their scores + (measured in SIFT algorithm as the local contrast) + + @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The + number of octaves is computed automatically from the image resolution. + + @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform + (low-contrast) regions. The larger the threshold, the less features are produced by the detector. + + @note The contrast threshold will be divided by nOctaveLayers when the filtering is applied. When + nOctaveLayers is set to default and if you want to use the value used in D. Lowe paper, 0.03, set + this argument to 0.09. + + @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning + is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are + filtered out (more features are retained). + + @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image + is captured with a weak camera with soft lenses, you might want to reduce the number. + + @param descriptorType The type of descriptors. Only CV_32F and CV_8U are supported. + */ + CV_WRAP static Ptr create(int nfeatures, int nOctaveLayers, + double contrastThreshold, double edgeThreshold, + double sigma, int descriptorType); + + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +typedef SIFT SiftFeatureDetector; +typedef SIFT SiftDescriptorExtractor; + + +/** @brief Class implementing the BRISK keypoint detector and descriptor extractor, described in @cite LCS11 . + */ +class CV_EXPORTS_W BRISK : public Feature2D +{ +public: + /** @brief The BRISK constructor + + @param thresh AGAST detection threshold score. + @param octaves detection octaves. Use 0 to do single scale. + @param patternScale apply this scale to the pattern used for sampling the neighbourhood of a + keypoint. + */ + CV_WRAP static Ptr create(int thresh=30, int octaves=3, float patternScale=1.0f); + + /** @brief The BRISK constructor for a custom pattern + + @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for + keypoint scale 1). + @param numberList defines the number of sampling points on the sampling circle. Must be the same + size as radiusList.. + @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint + scale 1). + @param dMin threshold for the long pairings used for orientation determination (in pixels for + keypoint scale 1). + @param indexChange index remapping of the bits. */ + CV_WRAP static Ptr create(const std::vector &radiusList, const std::vector &numberList, + float dMax=5.85f, float dMin=8.2f, const std::vector& indexChange=std::vector()); + + /** @brief The BRISK constructor for a custom pattern, detection threshold and octaves + + @param thresh AGAST detection threshold score. + @param octaves detection octaves. Use 0 to do single scale. + @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for + keypoint scale 1). + @param numberList defines the number of sampling points on the sampling circle. Must be the same + size as radiusList.. + @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint + scale 1). + @param dMin threshold for the long pairings used for orientation determination (in pixels for + keypoint scale 1). + @param indexChange index remapping of the bits. */ + CV_WRAP static Ptr create(int thresh, int octaves, const std::vector &radiusList, + const std::vector &numberList, float dMax=5.85f, float dMin=8.2f, + const std::vector& indexChange=std::vector()); + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; + + /** @brief Set detection threshold. + @param threshold AGAST detection threshold score. + */ + CV_WRAP virtual void setThreshold(int threshold) { CV_UNUSED(threshold); return; } + CV_WRAP virtual int getThreshold() const { return -1; } + + /** @brief Set detection octaves. + @param octaves detection octaves. Use 0 to do single scale. + */ + CV_WRAP virtual void setOctaves(int octaves) { CV_UNUSED(octaves); return; } + CV_WRAP virtual int getOctaves() const { return -1; } +}; + +/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor + +described in @cite RRKB11 . The algorithm uses FAST in pyramids to detect stable keypoints, selects +the strongest features using FAST or Harris response, finds their orientation using first-order +moments and computes the descriptors using BRIEF (where the coordinates of random point pairs (or +k-tuples) are rotated according to the measured orientation). + */ +class CV_EXPORTS_W ORB : public Feature2D +{ +public: + enum ScoreType { HARRIS_SCORE=0, FAST_SCORE=1 }; + static const int kBytes = 32; + + /** @brief The ORB constructor + + @param nfeatures The maximum number of features to retain. + @param scaleFactor Pyramid decimation ratio, greater than 1. scaleFactor==2 means the classical + pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor + will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor + will mean that to cover certain scale range you will need more pyramid levels and so the speed + will suffer. + @param nlevels The number of pyramid levels. The smallest level will have linear size equal to + input_image_linear_size/pow(scaleFactor, nlevels - firstLevel). + @param edgeThreshold This is size of the border where the features are not detected. It should + roughly match the patchSize parameter. + @param firstLevel The level of pyramid to put source image to. Previous layers are filled + with upscaled source image. + @param WTA_K The number of points that produce each element of the oriented BRIEF descriptor. The + default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, + so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 + random points (of course, those point coordinates are random, but they are generated from the + pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel + rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such + output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, + denoted as NORM_HAMMING2 (2 bits per bin). When WTA_K=4, we take 4 random points to compute each + bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3). + @param scoreType The default HARRIS_SCORE means that Harris algorithm is used to rank features + (the score is written to KeyPoint::score and is used to retain best nfeatures features); + FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, + but it is a little faster to compute. + @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller + pyramid layers the perceived image area covered by a feature will be larger. + @param fastThreshold the fast threshold + */ + CV_WRAP static Ptr create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31, + int firstLevel=0, int WTA_K=2, ORB::ScoreType scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20); + + CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0; + CV_WRAP virtual int getMaxFeatures() const = 0; + + CV_WRAP virtual void setScaleFactor(double scaleFactor) = 0; + CV_WRAP virtual double getScaleFactor() const = 0; + + CV_WRAP virtual void setNLevels(int nlevels) = 0; + CV_WRAP virtual int getNLevels() const = 0; + + CV_WRAP virtual void setEdgeThreshold(int edgeThreshold) = 0; + CV_WRAP virtual int getEdgeThreshold() const = 0; + + CV_WRAP virtual void setFirstLevel(int firstLevel) = 0; + CV_WRAP virtual int getFirstLevel() const = 0; + + CV_WRAP virtual void setWTA_K(int wta_k) = 0; + CV_WRAP virtual int getWTA_K() const = 0; + + CV_WRAP virtual void setScoreType(ORB::ScoreType scoreType) = 0; + CV_WRAP virtual ORB::ScoreType getScoreType() const = 0; + + CV_WRAP virtual void setPatchSize(int patchSize) = 0; + CV_WRAP virtual int getPatchSize() const = 0; + + CV_WRAP virtual void setFastThreshold(int fastThreshold) = 0; + CV_WRAP virtual int getFastThreshold() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @brief Maximally stable extremal region extractor + +The class encapsulates all the parameters of the %MSER extraction algorithm (see [wiki +article](http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions)). + +- there are two different implementation of %MSER: one for grey image, one for color image + +- the grey image algorithm is taken from: @cite nister2008linear ; the paper claims to be faster +than union-find method; it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop. + +- the color image algorithm is taken from: @cite forssen2007maximally ; it should be much slower +than grey image method ( 3~4 times ); the chi_table.h file is taken directly from paper's source +code which is distributed under GPL. + +- (Python) A complete example showing the use of the %MSER detector can be found at samples/python/mser.py +*/ +class CV_EXPORTS_W MSER : public Feature2D +{ +public: + /** @brief Full constructor for %MSER detector + + @param _delta it compares \f$(size_{i}-size_{i-delta})/size_{i-delta}\f$ + @param _min_area prune the area which smaller than minArea + @param _max_area prune the area which bigger than maxArea + @param _max_variation prune the area have similar size to its children + @param _min_diversity for color image, trace back to cut off mser with diversity less than min_diversity + @param _max_evolution for color image, the evolution steps + @param _area_threshold for color image, the area threshold to cause re-initialize + @param _min_margin for color image, ignore too small margin + @param _edge_blur_size for color image, the aperture size for edge blur + */ + CV_WRAP static Ptr create( int _delta=5, int _min_area=60, int _max_area=14400, + double _max_variation=0.25, double _min_diversity=.2, + int _max_evolution=200, double _area_threshold=1.01, + double _min_margin=0.003, int _edge_blur_size=5 ); + + /** @brief Detect %MSER regions + + @param image input image (8UC1, 8UC3 or 8UC4, must be greater or equal than 3x3) + @param msers resulting list of point sets + @param bboxes resulting bounding boxes + */ + CV_WRAP virtual void detectRegions( InputArray image, + CV_OUT std::vector >& msers, + CV_OUT std::vector& bboxes ) = 0; + + CV_WRAP virtual void setDelta(int delta) = 0; + CV_WRAP virtual int getDelta() const = 0; + + CV_WRAP virtual void setMinArea(int minArea) = 0; + CV_WRAP virtual int getMinArea() const = 0; + + CV_WRAP virtual void setMaxArea(int maxArea) = 0; + CV_WRAP virtual int getMaxArea() const = 0; + + CV_WRAP virtual void setPass2Only(bool f) = 0; + CV_WRAP virtual bool getPass2Only() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Wrapping class for feature detection using the FAST method. : + */ +class CV_EXPORTS_W FastFeatureDetector : public Feature2D +{ +public: + enum DetectorType + { + TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2 + }; + enum + { + THRESHOLD = 10000, NONMAX_SUPPRESSION=10001, FAST_N=10002 + }; + + + CV_WRAP static Ptr create( int threshold=10, + bool nonmaxSuppression=true, + FastFeatureDetector::DetectorType type=FastFeatureDetector::TYPE_9_16 ); + + CV_WRAP virtual void setThreshold(int threshold) = 0; + CV_WRAP virtual int getThreshold() const = 0; + + CV_WRAP virtual void setNonmaxSuppression(bool f) = 0; + CV_WRAP virtual bool getNonmaxSuppression() const = 0; + + CV_WRAP virtual void setType(FastFeatureDetector::DetectorType type) = 0; + CV_WRAP virtual FastFeatureDetector::DetectorType getType() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @overload */ +CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression=true ); + +/** @brief Detects corners using the FAST algorithm + +@param image grayscale image where keypoints (corners) are detected. +@param keypoints keypoints detected on the image. +@param threshold threshold on difference between intensity of the central pixel and pixels of a +circle around this pixel. +@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners +(keypoints). +@param type one of the three neighborhoods as defined in the paper: +FastFeatureDetector::TYPE_9_16, FastFeatureDetector::TYPE_7_12, +FastFeatureDetector::TYPE_5_8 + +Detects corners using the FAST algorithm by @cite Rosten06 . + +@note In Python API, types are given as cv.FAST_FEATURE_DETECTOR_TYPE_5_8, +cv.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner +detection, use cv.FAST.detect() method. + */ +CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression, FastFeatureDetector::DetectorType type ); + +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Wrapping class for feature detection using the AGAST method. : + */ +class CV_EXPORTS_W AgastFeatureDetector : public Feature2D +{ +public: + enum DetectorType + { + AGAST_5_8 = 0, AGAST_7_12d = 1, AGAST_7_12s = 2, OAST_9_16 = 3, + }; + + enum + { + THRESHOLD = 10000, NONMAX_SUPPRESSION = 10001, + }; + + CV_WRAP static Ptr create( int threshold=10, + bool nonmaxSuppression=true, + AgastFeatureDetector::DetectorType type = AgastFeatureDetector::OAST_9_16); + + CV_WRAP virtual void setThreshold(int threshold) = 0; + CV_WRAP virtual int getThreshold() const = 0; + + CV_WRAP virtual void setNonmaxSuppression(bool f) = 0; + CV_WRAP virtual bool getNonmaxSuppression() const = 0; + + CV_WRAP virtual void setType(AgastFeatureDetector::DetectorType type) = 0; + CV_WRAP virtual AgastFeatureDetector::DetectorType getType() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @overload */ +CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression=true ); + +/** @brief Detects corners using the AGAST algorithm + +@param image grayscale image where keypoints (corners) are detected. +@param keypoints keypoints detected on the image. +@param threshold threshold on difference between intensity of the central pixel and pixels of a +circle around this pixel. +@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners +(keypoints). +@param type one of the four neighborhoods as defined in the paper: +AgastFeatureDetector::AGAST_5_8, AgastFeatureDetector::AGAST_7_12d, +AgastFeatureDetector::AGAST_7_12s, AgastFeatureDetector::OAST_9_16 + +For non-Intel platforms, there is a tree optimised variant of AGAST with same numerical results. +The 32-bit binary tree tables were generated automatically from original code using perl script. +The perl script and examples of tree generation are placed in features2d/doc folder. +Detects corners using the AGAST algorithm by @cite mair2010_agast . + + */ +CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression, AgastFeatureDetector::DetectorType type ); + +/** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. : + */ +class CV_EXPORTS_W GFTTDetector : public Feature2D +{ +public: + CV_WRAP static Ptr create( int maxCorners=1000, double qualityLevel=0.01, double minDistance=1, + int blockSize=3, bool useHarrisDetector=false, double k=0.04 ); + CV_WRAP static Ptr create( int maxCorners, double qualityLevel, double minDistance, + int blockSize, int gradiantSize, bool useHarrisDetector=false, double k=0.04 ); + CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0; + CV_WRAP virtual int getMaxFeatures() const = 0; + + CV_WRAP virtual void setQualityLevel(double qlevel) = 0; + CV_WRAP virtual double getQualityLevel() const = 0; + + CV_WRAP virtual void setMinDistance(double minDistance) = 0; + CV_WRAP virtual double getMinDistance() const = 0; + + CV_WRAP virtual void setBlockSize(int blockSize) = 0; + CV_WRAP virtual int getBlockSize() const = 0; + + CV_WRAP virtual void setHarrisDetector(bool val) = 0; + CV_WRAP virtual bool getHarrisDetector() const = 0; + + CV_WRAP virtual void setK(double k) = 0; + CV_WRAP virtual double getK() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @brief Class for extracting blobs from an image. : + +The class implements a simple algorithm for extracting blobs from an image: + +1. Convert the source image to binary images by applying thresholding with several thresholds from + minThreshold (inclusive) to maxThreshold (exclusive) with distance thresholdStep between + neighboring thresholds. +2. Extract connected components from every binary image by findContours and calculate their + centers. +3. Group centers from several binary images by their coordinates. Close centers form one group that + corresponds to one blob, which is controlled by the minDistBetweenBlobs parameter. +4. From the groups, estimate final centers of blobs and their radiuses and return as locations and + sizes of keypoints. + +This class performs several filtrations of returned blobs. You should set filterBy\* to true/false +to turn on/off corresponding filtration. Available filtrations: + +- **By color**. This filter compares the intensity of a binary image at the center of a blob to +blobColor. If they differ, the blob is filtered out. Use blobColor = 0 to extract dark blobs +and blobColor = 255 to extract light blobs. +- **By area**. Extracted blobs have an area between minArea (inclusive) and maxArea (exclusive). +- **By circularity**. Extracted blobs have circularity +(\f$\frac{4*\pi*Area}{perimeter * perimeter}\f$) between minCircularity (inclusive) and +maxCircularity (exclusive). +- **By ratio of the minimum inertia to maximum inertia**. Extracted blobs have this ratio +between minInertiaRatio (inclusive) and maxInertiaRatio (exclusive). +- **By convexity**. Extracted blobs have convexity (area / area of blob convex hull) between +minConvexity (inclusive) and maxConvexity (exclusive). + +Default values of parameters are tuned to extract dark circular blobs. + */ +class CV_EXPORTS_W SimpleBlobDetector : public Feature2D +{ +public: + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + CV_PROP_RW float thresholdStep; + CV_PROP_RW float minThreshold; + CV_PROP_RW float maxThreshold; + CV_PROP_RW size_t minRepeatability; + CV_PROP_RW float minDistBetweenBlobs; + + CV_PROP_RW bool filterByColor; + CV_PROP_RW uchar blobColor; + + CV_PROP_RW bool filterByArea; + CV_PROP_RW float minArea, maxArea; + + CV_PROP_RW bool filterByCircularity; + CV_PROP_RW float minCircularity, maxCircularity; + + CV_PROP_RW bool filterByInertia; + CV_PROP_RW float minInertiaRatio, maxInertiaRatio; + + CV_PROP_RW bool filterByConvexity; + CV_PROP_RW float minConvexity, maxConvexity; + + void read( const FileNode& fn ); + void write( FileStorage& fs ) const; + }; + + CV_WRAP static Ptr + create(const SimpleBlobDetector::Params ¶meters = SimpleBlobDetector::Params()); + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Class implementing the KAZE keypoint detector and descriptor extractor, described in @cite ABD12 . + +@note AKAZE descriptor can only be used with KAZE or AKAZE keypoints .. [ABD12] KAZE Features. Pablo +F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on Computer Vision +(ECCV), Fiorenze, Italy, October 2012. +*/ +class CV_EXPORTS_W KAZE : public Feature2D +{ +public: + enum DiffusivityType + { + DIFF_PM_G1 = 0, + DIFF_PM_G2 = 1, + DIFF_WEICKERT = 2, + DIFF_CHARBONNIER = 3 + }; + + /** @brief The KAZE constructor + + @param extended Set to enable extraction of extended (128-byte) descriptor. + @param upright Set to enable use of upright descriptors (non rotation-invariant). + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or + DIFF_CHARBONNIER + */ + CV_WRAP static Ptr create(bool extended=false, bool upright=false, + float threshold = 0.001f, + int nOctaves = 4, int nOctaveLayers = 4, + KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2); + + CV_WRAP virtual void setExtended(bool extended) = 0; + CV_WRAP virtual bool getExtended() const = 0; + + CV_WRAP virtual void setUpright(bool upright) = 0; + CV_WRAP virtual bool getUpright() const = 0; + + CV_WRAP virtual void setThreshold(double threshold) = 0; + CV_WRAP virtual double getThreshold() const = 0; + + CV_WRAP virtual void setNOctaves(int octaves) = 0; + CV_WRAP virtual int getNOctaves() const = 0; + + CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0; + CV_WRAP virtual int getNOctaveLayers() const = 0; + + CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0; + CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13. + +@details AKAZE descriptors can only be used with KAZE or AKAZE keypoints. This class is thread-safe. + +@note When you need descriptors use Feature2D::detectAndCompute, which +provides better performance. When using Feature2D::detect followed by +Feature2D::compute scale space pyramid is computed twice. + +@note AKAZE implements T-API. When image is passed as UMat some parts of the algorithm +will use OpenCL. + +@note [ANB13] Fast Explicit Diffusion for Accelerated Features in Nonlinear +Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien Bartoli. In +British Machine Vision Conference (BMVC), Bristol, UK, September 2013. + +*/ +class CV_EXPORTS_W AKAZE : public Feature2D +{ +public: + // AKAZE descriptor type + enum DescriptorType + { + DESCRIPTOR_KAZE_UPRIGHT = 2, ///< Upright descriptors, not invariant to rotation + DESCRIPTOR_KAZE = 3, + DESCRIPTOR_MLDB_UPRIGHT = 4, ///< Upright descriptors, not invariant to rotation + DESCRIPTOR_MLDB = 5 + }; + + /** @brief The AKAZE constructor + + @param descriptor_type Type of the extracted descriptor: DESCRIPTOR_KAZE, + DESCRIPTOR_KAZE_UPRIGHT, DESCRIPTOR_MLDB or DESCRIPTOR_MLDB_UPRIGHT. + @param descriptor_size Size of the descriptor in bits. 0 -\> Full size + @param descriptor_channels Number of channels in the descriptor (1, 2, 3) + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or + DIFF_CHARBONNIER + */ + CV_WRAP static Ptr create(AKAZE::DescriptorType descriptor_type = AKAZE::DESCRIPTOR_MLDB, + int descriptor_size = 0, int descriptor_channels = 3, + float threshold = 0.001f, int nOctaves = 4, + int nOctaveLayers = 4, KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2); + + CV_WRAP virtual void setDescriptorType(AKAZE::DescriptorType dtype) = 0; + CV_WRAP virtual AKAZE::DescriptorType getDescriptorType() const = 0; + + CV_WRAP virtual void setDescriptorSize(int dsize) = 0; + CV_WRAP virtual int getDescriptorSize() const = 0; + + CV_WRAP virtual void setDescriptorChannels(int dch) = 0; + CV_WRAP virtual int getDescriptorChannels() const = 0; + + CV_WRAP virtual void setThreshold(double threshold) = 0; + CV_WRAP virtual double getThreshold() const = 0; + + CV_WRAP virtual void setNOctaves(int octaves) = 0; + CV_WRAP virtual int getNOctaves() const = 0; + + CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0; + CV_WRAP virtual int getNOctaveLayers() const = 0; + + CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0; + CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +//! @} features2d_main + +/****************************************************************************************\ +* Distance * +\****************************************************************************************/ + +template +struct CV_EXPORTS Accumulator +{ + typedef T Type; +}; + +template<> struct Accumulator { typedef float Type; }; +template<> struct Accumulator { typedef float Type; }; +template<> struct Accumulator { typedef float Type; }; +template<> struct Accumulator { typedef float Type; }; + +/* + * Squared Euclidean distance functor + */ +template +struct CV_EXPORTS SL2 +{ + static const NormTypes normType = NORM_L2SQR; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const + { + return normL2Sqr(a, b, size); + } +}; + +/* + * Euclidean distance functor + */ +template +struct L2 +{ + static const NormTypes normType = NORM_L2; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const + { + return (ResultType)std::sqrt((double)normL2Sqr(a, b, size)); + } +}; + +/* + * Manhattan distance (city block distance) functor + */ +template +struct L1 +{ + static const NormTypes normType = NORM_L1; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const + { + return normL1(a, b, size); + } +}; + +/****************************************************************************************\ +* DescriptorMatcher * +\****************************************************************************************/ + +//! @addtogroup features2d_match +//! @{ + +/** @brief Abstract base class for matching keypoint descriptors. + +It has two groups of match methods: for matching descriptors of an image with another image or with +an image set. + */ +class CV_EXPORTS_W DescriptorMatcher : public Algorithm +{ +public: + enum MatcherType + { + FLANNBASED = 1, + BRUTEFORCE = 2, + BRUTEFORCE_L1 = 3, + BRUTEFORCE_HAMMING = 4, + BRUTEFORCE_HAMMINGLUT = 5, + BRUTEFORCE_SL2 = 6 + }; + + virtual ~DescriptorMatcher(); + + /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor + collection. + + If the collection is not empty, the new descriptors are added to existing train descriptors. + + @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same + train image. + */ + CV_WRAP virtual void add( InputArrayOfArrays descriptors ); + + /** @brief Returns a constant link to the train descriptor collection trainDescCollection . + */ + CV_WRAP const std::vector& getTrainDescriptors() const; + + /** @brief Clears the train descriptor collections. + */ + CV_WRAP virtual void clear() CV_OVERRIDE; + + /** @brief Returns true if there are no train descriptors in the both collections. + */ + CV_WRAP virtual bool empty() const CV_OVERRIDE; + + /** @brief Returns true if the descriptor matcher supports masking permissible matches. + */ + CV_WRAP virtual bool isMaskSupported() const = 0; + + /** @brief Trains a descriptor matcher + + Trains a descriptor matcher (for example, the flann index). In all methods to match, the method + train() is run every time before matching. Some descriptor matchers (for example, BruteForceMatcher) + have an empty implementation of this method. Other matchers really train their inner structures (for + example, FlannBasedMatcher trains flann::Index ). + */ + CV_WRAP virtual void train(); + + /** @brief Finds the best match for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. + */ + CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors, + CV_OUT std::vector& matches, InputArray mask=noArray() ) const; + + /** @brief Finds the k best matches for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + These extended variants of DescriptorMatcher::match methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match + for the details about query and train descriptors. + */ + CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors, + CV_OUT std::vector >& matches, int k, + InputArray mask=noArray(), bool compactResult=false ) const; + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Found matches. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ + CV_WRAP void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors, + CV_OUT std::vector >& matches, float maxDistance, + InputArray mask=noArray(), bool compactResult=false ) const; + + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + */ + CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector& matches, + InputArrayOfArrays masks=noArray() ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Found matches. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + CV_WRAP void radiusMatch( InputArray queryDescriptors, CV_OUT std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ); + + + CV_WRAP void write( const String& fileName ) const + { + FileStorage fs(fileName, FileStorage::WRITE); + write(fs); + } + + CV_WRAP void read( const String& fileName ) + { + FileStorage fs(fileName, FileStorage::READ); + read(fs.root()); + } + // Reads matcher object from a file node + // see corresponding cv::Algorithm method + CV_WRAP virtual void read( const FileNode& ) CV_OVERRIDE; + // Writes matcher object to a file storage + virtual void write( FileStorage& ) const CV_OVERRIDE; + + /** @brief Clones the matcher. + + @param emptyTrainData If emptyTrainData is false, the method creates a deep copy of the object, + that is, copies both parameters and train data. If emptyTrainData is true, the method creates an + object copy with the current parameters but with empty train data. + */ + CV_WRAP virtual Ptr clone( bool emptyTrainData=false ) const = 0; + + /** @brief Creates a descriptor matcher of a given type with the default parameters (using default + constructor). + + @param descriptorMatcherType Descriptor matcher type. Now the following matcher types are + supported: + - `BruteForce` (it uses L2 ) + - `BruteForce-L1` + - `BruteForce-Hamming` + - `BruteForce-Hamming(2)` + - `FlannBased` + */ + CV_WRAP static Ptr create( const String& descriptorMatcherType ); + + CV_WRAP static Ptr create( const DescriptorMatcher::MatcherType& matcherType ); + + + // see corresponding cv::Algorithm method + CV_WRAP inline void write(const Ptr& fs, const String& name = String()) const { Algorithm::write(fs, name); } + +protected: + /** + * Class to work with descriptors from several images as with one merged matrix. + * It is used e.g. in FlannBasedMatcher. + */ + class CV_EXPORTS DescriptorCollection + { + public: + DescriptorCollection(); + DescriptorCollection( const DescriptorCollection& collection ); + virtual ~DescriptorCollection(); + + // Vector of matrices "descriptors" will be merged to one matrix "mergedDescriptors" here. + void set( const std::vector& descriptors ); + virtual void clear(); + + const Mat& getDescriptors() const; + const Mat getDescriptor( int imgIdx, int localDescIdx ) const; + const Mat getDescriptor( int globalDescIdx ) const; + void getLocalIdx( int globalDescIdx, int& imgIdx, int& localDescIdx ) const; + + int size() const; + + protected: + Mat mergedDescriptors; + std::vector startIdxs; + }; + + //! In fact the matching is implemented only by the following two methods. These methods suppose + //! that the class object has been trained already. Public match methods call these methods + //! after calling train(). + virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0; + virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0; + + static bool isPossibleMatch( InputArray mask, int queryIdx, int trainIdx ); + static bool isMaskedOut( InputArrayOfArrays masks, int queryIdx ); + + static Mat clone_op( Mat m ) { return m.clone(); } + void checkMasks( InputArrayOfArrays masks, int queryDescriptorsCount ) const; + + //! Collection of descriptors from train images. + std::vector trainDescCollection; + std::vector utrainDescCollection; +}; + +/** @brief Brute-force descriptor matcher. + +For each descriptor in the first set, this matcher finds the closest descriptor in the second set +by trying each one. This descriptor matcher supports masking permissible matches of descriptor +sets. + */ +class CV_EXPORTS_W BFMatcher : public DescriptorMatcher +{ +public: + /** @brief Brute-force matcher constructor (obsolete). Please use BFMatcher.create() + * + * + */ + CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false ); + + virtual ~BFMatcher() {} + + virtual bool isMaskSupported() const CV_OVERRIDE { return true; } + + /** @brief Brute-force matcher create method. + @param normType One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are + preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and + BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 (see ORB::ORB constructor + description). + @param crossCheck If it is false, this is will be default BFMatcher behaviour when it finds the k + nearest neighbors for each query descriptor. If crossCheck==true, then the knnMatch() method with + k=1 will only return pairs (i,j) such that for i-th query descriptor the j-th descriptor in the + matcher's collection is the nearest and vice versa, i.e. the BFMatcher will only return consistent + pairs. Such technique usually produces best results with minimal number of outliers when there are + enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper. + */ + CV_WRAP static Ptr create( int normType=NORM_L2, bool crossCheck=false ) ; + + virtual Ptr clone( bool emptyTrainData=false ) const CV_OVERRIDE; +protected: + virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + + int normType; + bool crossCheck; +}; + +#if defined(HAVE_OPENCV_FLANN) || defined(CV_DOXYGEN) + +/** @brief Flann-based descriptor matcher. + +This matcher trains cv::flann::Index on a train descriptor collection and calls its nearest search +methods to find the best matches. So, this matcher may be faster when matching a large train +collection than the brute force matcher. FlannBasedMatcher does not support masking permissible +matches of descriptor sets because flann::Index does not support this. : + */ +class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher +{ +public: + CV_WRAP FlannBasedMatcher( const Ptr& indexParams=makePtr(), + const Ptr& searchParams=makePtr() ); + + virtual void add( InputArrayOfArrays descriptors ) CV_OVERRIDE; + virtual void clear() CV_OVERRIDE; + + // Reads matcher object from a file node + virtual void read( const FileNode& ) CV_OVERRIDE; + // Writes matcher object to a file storage + virtual void write( FileStorage& ) const CV_OVERRIDE; + + virtual void train() CV_OVERRIDE; + virtual bool isMaskSupported() const CV_OVERRIDE; + + CV_WRAP static Ptr create(); + + virtual Ptr clone( bool emptyTrainData=false ) const CV_OVERRIDE; +protected: + static void convertToDMatches( const DescriptorCollection& descriptors, + const Mat& indices, const Mat& distances, + std::vector >& matches ); + + virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + + Ptr indexParams; + Ptr searchParams; + Ptr flannIndex; + + DescriptorCollection mergedDescriptors; + int addedDescCount; +}; + +#endif + +//! @} features2d_match + +/****************************************************************************************\ +* Drawing functions * +\****************************************************************************************/ + +//! @addtogroup features2d_draw +//! @{ + +enum struct DrawMatchesFlags +{ + DEFAULT = 0, //!< Output image matrix will be created (Mat::create), + //!< i.e. existing memory of output image may be reused. + //!< Two source image, matches and single keypoints will be drawn. + //!< For each keypoint only the center point will be drawn (without + //!< the circle around keypoint with keypoint size and orientation). + DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create). + //!< Matches will be drawn on existing content of output image. + NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn. + DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and + //!< orientation will be drawn. +}; +CV_ENUM_FLAGS(DrawMatchesFlags) + +/** @brief Draws keypoints. + +@param image Source image. +@param keypoints Keypoints from the source image. +@param outImage Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param color Color of keypoints. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. See details above in drawMatches . + +@note +For Python API, flags are modified as cv.DRAW_MATCHES_FLAGS_DEFAULT, +cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG, +cv.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS + */ +CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector& keypoints, InputOutputArray outImage, + const Scalar& color=Scalar::all(-1), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); + +/** @brief Draws the found matches of keypoints from two images. + +@param img1 First source image. +@param keypoints1 Keypoints from the first source image. +@param img2 Second source image. +@param keypoints2 Keypoints from the second source image. +@param matches1to2 Matches from the first image to the second one, which means that keypoints1[i] +has a corresponding point in keypoints2[matches[i]] . +@param outImg Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param matchColor Color of matches (lines and connected keypoints). If matchColor==Scalar::all(-1) +, the color is generated randomly. +@param singlePointColor Color of single keypoints (circles), which means that keypoints do not +have the matches. If singlePointColor==Scalar::all(-1) , the color is generated randomly. +@param matchesMask Mask determining which matches are drawn. If the mask is empty, all matches are +drawn. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. + +This function draws matches of keypoints from two images in the output image. Match is a line +connecting two keypoints (circles). See cv::DrawMatchesFlags. + */ +CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& keypoints1, + InputArray img2, const std::vector& keypoints2, + const std::vector& matches1to2, InputOutputArray outImg, + const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), + const std::vector& matchesMask=std::vector(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); + +/** @overload */ +CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector& keypoints1, + InputArray img2, const std::vector& keypoints2, + const std::vector >& matches1to2, InputOutputArray outImg, + const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), + const std::vector >& matchesMask=std::vector >(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); + +//! @} features2d_draw + +/****************************************************************************************\ +* Functions to evaluate the feature detectors and [generic] descriptor extractors * +\****************************************************************************************/ + +CV_EXPORTS void evaluateFeatureDetector( const Mat& img1, const Mat& img2, const Mat& H1to2, + std::vector* keypoints1, std::vector* keypoints2, + float& repeatability, int& correspCount, + const Ptr& fdetector=Ptr() ); + +CV_EXPORTS void computeRecallPrecisionCurve( const std::vector >& matches1to2, + const std::vector >& correctMatches1to2Mask, + std::vector& recallPrecisionCurve ); + +CV_EXPORTS float getRecall( const std::vector& recallPrecisionCurve, float l_precision ); +CV_EXPORTS int getNearestPoint( const std::vector& recallPrecisionCurve, float l_precision ); + +/****************************************************************************************\ +* Bag of visual words * +\****************************************************************************************/ + +//! @addtogroup features2d_category +//! @{ + +/** @brief Abstract base class for training the *bag of visual words* vocabulary from a set of descriptors. + +For details, see, for example, *Visual Categorization with Bags of Keypoints* by Gabriella Csurka, +Christopher R. Dance, Lixin Fan, Jutta Willamowski, Cedric Bray, 2004. : + */ +class CV_EXPORTS_W BOWTrainer +{ +public: + BOWTrainer(); + virtual ~BOWTrainer(); + + /** @brief Adds descriptors to a training set. + + @param descriptors Descriptors to add to a training set. Each row of the descriptors matrix is a + descriptor. + + The training set is clustered using clustermethod to construct the vocabulary. + */ + CV_WRAP void add( const Mat& descriptors ); + + /** @brief Returns a training set of descriptors. + */ + CV_WRAP const std::vector& getDescriptors() const; + + /** @brief Returns the count of all descriptors stored in the training set. + */ + CV_WRAP int descriptorsCount() const; + + CV_WRAP virtual void clear(); + + /** @overload */ + CV_WRAP virtual Mat cluster() const = 0; + + /** @brief Clusters train descriptors. + + @param descriptors Descriptors to cluster. Each row of the descriptors matrix is a descriptor. + Descriptors are not added to the inner train descriptor set. + + The vocabulary consists of cluster centers. So, this method returns the vocabulary. In the first + variant of the method, train descriptors stored in the object are clustered. In the second variant, + input descriptors are clustered. + */ + CV_WRAP virtual Mat cluster( const Mat& descriptors ) const = 0; + +protected: + std::vector descriptors; + int size; +}; + +/** @brief kmeans -based class to train visual vocabulary using the *bag of visual words* approach. : + */ +class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer +{ +public: + /** @brief The constructor. + + @see cv::kmeans + */ + CV_WRAP BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(), + int attempts=3, int flags=KMEANS_PP_CENTERS ); + virtual ~BOWKMeansTrainer(); + + // Returns trained vocabulary (i.e. cluster centers). + CV_WRAP virtual Mat cluster() const CV_OVERRIDE; + CV_WRAP virtual Mat cluster( const Mat& descriptors ) const CV_OVERRIDE; + +protected: + + int clusterCount; + TermCriteria termcrit; + int attempts; + int flags; +}; + +/** @brief Class to compute an image descriptor using the *bag of visual words*. + +Such a computation consists of the following steps: + +1. Compute descriptors for a given image and its keypoints set. +2. Find the nearest visual words from the vocabulary for each keypoint descriptor. +3. Compute the bag-of-words image descriptor as is a normalized histogram of vocabulary words +encountered in the image. The i-th bin of the histogram is a frequency of i-th word of the +vocabulary in the given image. + */ +class CV_EXPORTS_W BOWImgDescriptorExtractor +{ +public: + /** @brief The constructor. + + @param dextractor Descriptor extractor that is used to compute descriptors for an input image and + its keypoints. + @param dmatcher Descriptor matcher that is used to find the nearest word of the trained vocabulary + for each keypoint descriptor of the image. + */ + CV_WRAP BOWImgDescriptorExtractor( const Ptr& dextractor, + const Ptr& dmatcher ); + /** @overload */ + BOWImgDescriptorExtractor( const Ptr& dmatcher ); + virtual ~BOWImgDescriptorExtractor(); + + /** @brief Sets a visual vocabulary. + + @param vocabulary Vocabulary (can be trained using the inheritor of BOWTrainer ). Each row of the + vocabulary is a visual word (cluster center). + */ + CV_WRAP void setVocabulary( const Mat& vocabulary ); + + /** @brief Returns the set vocabulary. + */ + CV_WRAP const Mat& getVocabulary() const; + + /** @brief Computes an image descriptor using the set visual vocabulary. + + @param image Image, for which the descriptor is computed. + @param keypoints Keypoints detected in the input image. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + @param descriptors Descriptors of the image keypoints that are returned if they are non-zero. + */ + void compute( InputArray image, std::vector& keypoints, OutputArray imgDescriptor, + std::vector >* pointIdxsOfClusters=0, Mat* descriptors=0 ); + /** @overload + @param keypointDescriptors Computed descriptors to match with vocabulary. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + */ + void compute( InputArray keypointDescriptors, OutputArray imgDescriptor, + std::vector >* pointIdxsOfClusters=0 ); + // compute() is not constant because DescriptorMatcher::match is not constant + + CV_WRAP_AS(compute) void compute2( const Mat& image, std::vector& keypoints, CV_OUT Mat& imgDescriptor ) + { compute(image,keypoints,imgDescriptor); } + + /** @brief Returns an image descriptor size if the vocabulary is set. Otherwise, it returns 0. + */ + CV_WRAP int descriptorSize() const; + + /** @brief Returns an image descriptor type. + */ + CV_WRAP int descriptorType() const; + +protected: + Mat vocabulary; + Ptr dextractor; + Ptr dmatcher; +}; + +//! @} features2d_category + +//! @} features2d + +} /* namespace cv */ + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d/features2d.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d/features2d.hpp new file mode 100644 index 0000000..e81df0a --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d/features2d.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/features2d.hpp" diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d/hal/interface.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d/hal/interface.h new file mode 100644 index 0000000..bc3b084 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/features2d/hal/interface.h @@ -0,0 +1,33 @@ +#ifndef OPENCV_FEATURE2D_HAL_INTERFACE_H +#define OPENCV_FEATURE2D_HAL_INTERFACE_H + +#include "opencv2/core/cvdef.h" +//! @addtogroup features2d_hal_interface +//! @{ + +//! @name Fast feature detector types +//! @sa cv::FastFeatureDetector +//! @{ +#define CV_HAL_TYPE_5_8 0 +#define CV_HAL_TYPE_7_12 1 +#define CV_HAL_TYPE_9_16 2 +//! @} + +//! @name Key point +//! @sa cv::KeyPoint +//! @{ +struct CV_EXPORTS cvhalKeyPoint +{ + float x; + float y; + float size; + float angle; + float response; + int octave; + int class_id; +}; +//! @} + +//! @} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/highgui.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/highgui.hpp new file mode 100644 index 0000000..21560ee --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/highgui.hpp @@ -0,0 +1,17 @@ +// +// Copyright (C) 2021 nihui +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "opencv2/highgui/highgui.hpp" diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/highgui/highgui.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/highgui/highgui.hpp new file mode 100644 index 0000000..37f0031 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/highgui/highgui.hpp @@ -0,0 +1,58 @@ +// +// Copyright (C) 2021 nihui +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef OPENCV_HIGHGUI_HPP +#define OPENCV_HIGHGUI_HPP + +#include "opencv2/core.hpp" + +enum +{ + CV_LOAD_IMAGE_UNCHANGED = -1, + CV_LOAD_IMAGE_GRAYSCALE = 0, + CV_LOAD_IMAGE_COLOR = 1, +}; + +enum +{ + CV_IMWRITE_JPEG_QUALITY = 1 +}; + +namespace cv { + +enum ImreadModes +{ + IMREAD_UNCHANGED = -1, + IMREAD_GRAYSCALE = 0, + IMREAD_COLOR = 1 +}; + +enum ImwriteFlags +{ + IMWRITE_JPEG_QUALITY = 1 +}; + +CV_EXPORTS_W Mat imread(const String& filename, int flags = IMREAD_COLOR); + +CV_EXPORTS_W bool imwrite(const String& filename, InputArray img, const std::vector& params = std::vector()); + +CV_EXPORTS_W Mat imdecode(InputArray buf, int flags); + +CV_EXPORTS_W bool imencode(const String& ext, InputArray img, CV_OUT std::vector& buf, const std::vector& params = std::vector()); + +} // namespace cv + +#endif // OPENCV_HIGHGUI_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc.hpp new file mode 100644 index 0000000..c607a18 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc.hpp @@ -0,0 +1,4879 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_HPP +#define OPENCV_IMGPROC_HPP + +#include "opencv2/core.hpp" + +/** + @defgroup imgproc Image Processing + +This module includes image-processing functions. + + @{ + @defgroup imgproc_filter Image Filtering + +Functions and classes described in this section are used to perform various linear or non-linear +filtering operations on 2D images (represented as Mat's). It means that for each pixel location +\f$(x,y)\f$ in the source image (normally, rectangular), its neighborhood is considered and used to +compute the response. In case of a linear filter, it is a weighted sum of pixel values. In case of +morphological operations, it is the minimum or maximum values, and so on. The computed response is +stored in the destination image at the same location \f$(x,y)\f$. It means that the output image +will be of the same size as the input image. Normally, the functions support multi-channel arrays, +in which case every channel is processed independently. Therefore, the output image will also have +the same number of channels as the input one. + +Another common feature of the functions and classes described in this section is that, unlike +simple arithmetic functions, they need to extrapolate values of some non-existing pixels. For +example, if you want to smooth an image using a Gaussian \f$3 \times 3\f$ filter, then, when +processing the left-most pixels in each row, you need pixels to the left of them, that is, outside +of the image. You can let these pixels be the same as the left-most image pixels ("replicated +border" extrapolation method), or assume that all the non-existing pixels are zeros ("constant +border" extrapolation method), and so on. OpenCV enables you to specify the extrapolation method. +For details, see #BorderTypes + +@anchor filter_depths +### Depth combinations +Input depth (src.depth()) | Output depth (ddepth) +--------------------------|---------------------- +CV_8U | -1/CV_16S/CV_32F/CV_64F +CV_16U/CV_16S | -1/CV_32F/CV_64F +CV_32F | -1/CV_32F/CV_64F +CV_64F | -1/CV_64F + +@note when ddepth=-1, the output image will have the same depth as the source. + + @defgroup imgproc_transform Geometric Image Transformations + +The functions in this section perform various geometrical transformations of 2D images. They do not +change the image content but deform the pixel grid and map this deformed grid to the destination +image. In fact, to avoid sampling artifacts, the mapping is done in the reverse order, from +destination to the source. That is, for each pixel \f$(x, y)\f$ of the destination image, the +functions compute coordinates of the corresponding "donor" pixel in the source image and copy the +pixel value: + +\f[\texttt{dst} (x,y)= \texttt{src} (f_x(x,y), f_y(x,y))\f] + +In case when you specify the forward mapping \f$\left: \texttt{src} \rightarrow +\texttt{dst}\f$, the OpenCV functions first compute the corresponding inverse mapping +\f$\left: \texttt{dst} \rightarrow \texttt{src}\f$ and then use the above formula. + +The actual implementations of the geometrical transformations, from the most generic remap and to +the simplest and the fastest resize, need to solve two main problems with the above formula: + +- Extrapolation of non-existing pixels. Similarly to the filtering functions described in the +previous section, for some \f$(x,y)\f$, either one of \f$f_x(x,y)\f$, or \f$f_y(x,y)\f$, or both +of them may fall outside of the image. In this case, an extrapolation method needs to be used. +OpenCV provides the same selection of extrapolation methods as in the filtering functions. In +addition, it provides the method #BORDER_TRANSPARENT. This means that the corresponding pixels in +the destination image will not be modified at all. + +- Interpolation of pixel values. Usually \f$f_x(x,y)\f$ and \f$f_y(x,y)\f$ are floating-point +numbers. This means that \f$\left\f$ can be either an affine or perspective +transformation, or radial lens distortion correction, and so on. So, a pixel value at fractional +coordinates needs to be retrieved. In the simplest case, the coordinates can be just rounded to the +nearest integer coordinates and the corresponding pixel can be used. This is called a +nearest-neighbor interpolation. However, a better result can be achieved by using more +sophisticated [interpolation methods](http://en.wikipedia.org/wiki/Multivariate_interpolation) , +where a polynomial function is fit into some neighborhood of the computed pixel \f$(f_x(x,y), +f_y(x,y))\f$, and then the value of the polynomial at \f$(f_x(x,y), f_y(x,y))\f$ is taken as the +interpolated pixel value. In OpenCV, you can choose between several interpolation methods. See +resize for details. + +@note The geometrical transformations do not work with `CV_8S` or `CV_32S` images. + + @defgroup imgproc_misc Miscellaneous Image Transformations + @defgroup imgproc_draw Drawing Functions + +Drawing functions work with matrices/images of arbitrary depth. The boundaries of the shapes can be +rendered with antialiasing (implemented only for 8-bit images for now). All the functions include +the parameter color that uses an RGB value (that may be constructed with the Scalar constructor ) +for color images and brightness for grayscale images. For color images, the channel ordering is +normally *Blue, Green, Red*. This is what imshow, imread, and imwrite expect. So, if you form a +color using the Scalar constructor, it should look like: + +\f[\texttt{Scalar} (blue \_ component, green \_ component, red \_ component[, alpha \_ component])\f] + +If you are using your own image rendering and I/O functions, you can use any channel ordering. The +drawing functions process each channel independently and do not depend on the channel order or even +on the used color space. The whole image can be converted from BGR to RGB or to a different color +space using cvtColor . + +If a drawn figure is partially or completely outside the image, the drawing functions clip it. Also, +many drawing functions can handle pixel coordinates specified with sub-pixel accuracy. This means +that the coordinates can be passed as fixed-point numbers encoded as integers. The number of +fractional bits is specified by the shift parameter and the real point coordinates are calculated as +\f$\texttt{Point}(x,y)\rightarrow\texttt{Point2f}(x*2^{-shift},y*2^{-shift})\f$ . This feature is +especially effective when rendering antialiased shapes. + +@note The functions do not support alpha-transparency when the target image is 4-channel. In this +case, the color[3] is simply copied to the repainted pixels. Thus, if you want to paint +semi-transparent shapes, you can paint them in a separate buffer and then blend it with the main +image. + + @defgroup imgproc_color_conversions Color Space Conversions + @defgroup imgproc_colormap ColorMaps in OpenCV + +The human perception isn't built for observing fine changes in grayscale images. Human eyes are more +sensitive to observing changes between colors, so you often need to recolor your grayscale images to +get a clue about them. OpenCV now comes with various colormaps to enhance the visualization in your +computer vision application. + +In OpenCV you only need applyColorMap to apply a colormap on a given image. The following sample +code reads the path to an image from command line, applies a Jet colormap on it and shows the +result: + +@include snippets/imgproc_applyColorMap.cpp + +@see #ColormapTypes + + @defgroup imgproc_subdiv2d Planar Subdivision + +The Subdiv2D class described in this section is used to perform various planar subdivision on +a set of 2D points (represented as vector of Point2f). OpenCV subdivides a plane into triangles +using the Delaunay's algorithm, which corresponds to the dual graph of the Voronoi diagram. +In the figure below, the Delaunay's triangulation is marked with black lines and the Voronoi +diagram with red lines. + +![Delaunay triangulation (black) and Voronoi (red)](pics/delaunay_voronoi.png) + +The subdivisions can be used for the 3D piece-wise transformation of a plane, morphing, fast +location of points on the plane, building special graphs (such as NNG,RNG), and so forth. + + @defgroup imgproc_hist Histograms + @defgroup imgproc_shape Structural Analysis and Shape Descriptors + @defgroup imgproc_motion Motion Analysis and Object Tracking + @defgroup imgproc_feature Feature Detection + @defgroup imgproc_object Object Detection + @defgroup imgproc_c C API + @defgroup imgproc_hal Hardware Acceleration Layer + @{ + @defgroup imgproc_hal_functions Functions + @defgroup imgproc_hal_interface Interface + @} + @} +*/ + +namespace cv +{ + +/** @addtogroup imgproc +@{ +*/ + +//! @addtogroup imgproc_filter +//! @{ + +enum SpecialFilter { + FILTER_SCHARR = -1 +}; + +//! type of morphological operation +enum MorphTypes{ + MORPH_ERODE = 0, //!< see #erode + MORPH_DILATE = 1, //!< see #dilate + MORPH_OPEN = 2, //!< an opening operation + //!< \f[\texttt{dst} = \mathrm{open} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \mathrm{erode} ( \texttt{src} , \texttt{element} ))\f] + MORPH_CLOSE = 3, //!< a closing operation + //!< \f[\texttt{dst} = \mathrm{close} ( \texttt{src} , \texttt{element} )= \mathrm{erode} ( \mathrm{dilate} ( \texttt{src} , \texttt{element} ))\f] + MORPH_GRADIENT = 4, //!< a morphological gradient + //!< \f[\texttt{dst} = \mathrm{morph\_grad} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \texttt{src} , \texttt{element} )- \mathrm{erode} ( \texttt{src} , \texttt{element} )\f] + MORPH_TOPHAT = 5, //!< "top hat" + //!< \f[\texttt{dst} = \mathrm{tophat} ( \texttt{src} , \texttt{element} )= \texttt{src} - \mathrm{open} ( \texttt{src} , \texttt{element} )\f] + MORPH_BLACKHAT = 6, //!< "black hat" + //!< \f[\texttt{dst} = \mathrm{blackhat} ( \texttt{src} , \texttt{element} )= \mathrm{close} ( \texttt{src} , \texttt{element} )- \texttt{src}\f] + MORPH_HITMISS = 7 //!< "hit or miss" + //!< .- Only supported for CV_8UC1 binary images. A tutorial can be found in the documentation +}; + +//! shape of the structuring element +enum MorphShapes { + MORPH_RECT = 0, //!< a rectangular structuring element: \f[E_{ij}=1\f] + MORPH_CROSS = 1, //!< a cross-shaped structuring element: + //!< \f[E_{ij} = \begin{cases} 1 & \texttt{if } {i=\texttt{anchor.y } {or } {j=\texttt{anchor.x}}} \\0 & \texttt{otherwise} \end{cases}\f] + MORPH_ELLIPSE = 2 //!< an elliptic structuring element, that is, a filled ellipse inscribed + //!< into the rectangle Rect(0, 0, esize.width, 0.esize.height) +}; + +//! @} imgproc_filter + +//! @addtogroup imgproc_transform +//! @{ + +//! interpolation algorithm +enum InterpolationFlags{ + /** nearest neighbor interpolation */ + INTER_NEAREST = 0, + /** bilinear interpolation */ + INTER_LINEAR = 1, + /** bicubic interpolation */ + INTER_CUBIC = 2, + /** resampling using pixel area relation. It may be a preferred method for image decimation, as + it gives moire'-free results. But when the image is zoomed, it is similar to the INTER_NEAREST + method. */ + INTER_AREA = 3, + /** Lanczos interpolation over 8x8 neighborhood */ + INTER_LANCZOS4 = 4, + /** Bit exact bilinear interpolation */ + INTER_LINEAR_EXACT = 5, + /** Bit exact nearest neighbor interpolation. This will produce same results as + the nearest neighbor method in PIL, scikit-image or Matlab. */ + INTER_NEAREST_EXACT = 6, + /** mask for interpolation codes */ + INTER_MAX = 7, + /** flag, fills all of the destination image pixels. If some of them correspond to outliers in the + source image, they are set to zero */ + WARP_FILL_OUTLIERS = 8, + /** flag, inverse transformation + + For example, #linearPolar or #logPolar transforms: + - flag is __not__ set: \f$dst( \rho , \phi ) = src(x,y)\f$ + - flag is set: \f$dst(x,y) = src( \rho , \phi )\f$ + */ + WARP_INVERSE_MAP = 16 +}; + +/** \brief Specify the polar mapping mode +@sa warpPolar +*/ +enum WarpPolarMode +{ + WARP_POLAR_LINEAR = 0, ///< Remaps an image to/from polar space. + WARP_POLAR_LOG = 256 ///< Remaps an image to/from semilog-polar space. +}; + +enum InterpolationMasks { + INTER_BITS = 5, + INTER_BITS2 = INTER_BITS * 2, + INTER_TAB_SIZE = 1 << INTER_BITS, + INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE + }; + +//! @} imgproc_transform + +//! @addtogroup imgproc_misc +//! @{ + +//! Distance types for Distance Transform and M-estimators +//! @see distanceTransform, fitLine +enum DistanceTypes { + DIST_USER = -1, //!< User defined distance + DIST_L1 = 1, //!< distance = |x1-x2| + |y1-y2| + DIST_L2 = 2, //!< the simple euclidean distance + DIST_C = 3, //!< distance = max(|x1-x2|,|y1-y2|) + DIST_L12 = 4, //!< L1-L2 metric: distance = 2(sqrt(1+x*x/2) - 1)) + DIST_FAIR = 5, //!< distance = c^2(|x|/c-log(1+|x|/c)), c = 1.3998 + DIST_WELSCH = 6, //!< distance = c^2/2(1-exp(-(x/c)^2)), c = 2.9846 + DIST_HUBER = 7 //!< distance = |x| \texttt{thresh}\)}{0}{otherwise}\f] + THRESH_BINARY_INV = 1, //!< \f[\texttt{dst} (x,y) = \fork{0}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{\texttt{maxval}}{otherwise}\f] + THRESH_TRUNC = 2, //!< \f[\texttt{dst} (x,y) = \fork{\texttt{threshold}}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{\texttt{src}(x,y)}{otherwise}\f] + THRESH_TOZERO = 3, //!< \f[\texttt{dst} (x,y) = \fork{\texttt{src}(x,y)}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{0}{otherwise}\f] + THRESH_TOZERO_INV = 4, //!< \f[\texttt{dst} (x,y) = \fork{0}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{\texttt{src}(x,y)}{otherwise}\f] + THRESH_MASK = 7, + THRESH_OTSU = 8, //!< flag, use Otsu algorithm to choose the optimal threshold value + THRESH_TRIANGLE = 16 //!< flag, use Triangle algorithm to choose the optimal threshold value +}; + +//! adaptive threshold algorithm +//! @see adaptiveThreshold +enum AdaptiveThresholdTypes { + /** the threshold value \f$T(x,y)\f$ is a mean of the \f$\texttt{blockSize} \times + \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$ minus C */ + ADAPTIVE_THRESH_MEAN_C = 0, + /** the threshold value \f$T(x, y)\f$ is a weighted sum (cross-correlation with a Gaussian + window) of the \f$\texttt{blockSize} \times \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$ + minus C . The default sigma (standard deviation) is used for the specified blockSize . See + #getGaussianKernel*/ + ADAPTIVE_THRESH_GAUSSIAN_C = 1 +}; + +//! class of the pixel in GrabCut algorithm +enum GrabCutClasses { + GC_BGD = 0, //!< an obvious background pixels + GC_FGD = 1, //!< an obvious foreground (object) pixel + GC_PR_BGD = 2, //!< a possible background pixel + GC_PR_FGD = 3 //!< a possible foreground pixel +}; + +//! GrabCut algorithm flags +enum GrabCutModes { + /** The function initializes the state and the mask using the provided rectangle. After that it + runs iterCount iterations of the algorithm. */ + GC_INIT_WITH_RECT = 0, + /** The function initializes the state using the provided mask. Note that GC_INIT_WITH_RECT + and GC_INIT_WITH_MASK can be combined. Then, all the pixels outside of the ROI are + automatically initialized with GC_BGD .*/ + GC_INIT_WITH_MASK = 1, + /** The value means that the algorithm should just resume. */ + GC_EVAL = 2, + /** The value means that the algorithm should just run the grabCut algorithm (a single iteration) with the fixed model */ + GC_EVAL_FREEZE_MODEL = 3 +}; + +//! distanceTransform algorithm flags +enum DistanceTransformLabelTypes { + /** each connected component of zeros in src (as well as all the non-zero pixels closest to the + connected component) will be assigned the same label */ + DIST_LABEL_CCOMP = 0, + /** each zero pixel (and all the non-zero pixels closest to it) gets its own label. */ + DIST_LABEL_PIXEL = 1 +}; + +//! floodfill algorithm flags +enum FloodFillFlags { + /** If set, the difference between the current pixel and seed pixel is considered. Otherwise, + the difference between neighbor pixels is considered (that is, the range is floating). */ + FLOODFILL_FIXED_RANGE = 1 << 16, + /** If set, the function does not change the image ( newVal is ignored), and only fills the + mask with the value specified in bits 8-16 of flags as described above. This option only make + sense in function variants that have the mask parameter. */ + FLOODFILL_MASK_ONLY = 1 << 17 +}; + +//! @} imgproc_misc + +//! @addtogroup imgproc_shape +//! @{ + +//! connected components statistics +enum ConnectedComponentsTypes { + CC_STAT_LEFT = 0, //!< The leftmost (x) coordinate which is the inclusive start of the bounding + //!< box in the horizontal direction. + CC_STAT_TOP = 1, //!< The topmost (y) coordinate which is the inclusive start of the bounding + //!< box in the vertical direction. + CC_STAT_WIDTH = 2, //!< The horizontal size of the bounding box + CC_STAT_HEIGHT = 3, //!< The vertical size of the bounding box + CC_STAT_AREA = 4, //!< The total area (in pixels) of the connected component +#ifndef CV_DOXYGEN + CC_STAT_MAX = 5 //!< Max enumeration value. Used internally only for memory allocation +#endif +}; + +//! connected components algorithm +enum ConnectedComponentsAlgorithmsTypes { + CCL_WU = 0, //!< SAUF @cite Wu2009 algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity + CCL_DEFAULT = -1, //!< BBDT algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity + CCL_GRANA = 1 //!< BBDT algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity +}; + +//! mode of the contour retrieval algorithm +enum RetrievalModes { + /** retrieves only the extreme outer contours. It sets `hierarchy[i][2]=hierarchy[i][3]=-1` for + all the contours. */ + RETR_EXTERNAL = 0, + /** retrieves all of the contours without establishing any hierarchical relationships. */ + RETR_LIST = 1, + /** retrieves all of the contours and organizes them into a two-level hierarchy. At the top + level, there are external boundaries of the components. At the second level, there are + boundaries of the holes. If there is another contour inside a hole of a connected component, it + is still put at the top level. */ + RETR_CCOMP = 2, + /** retrieves all of the contours and reconstructs a full hierarchy of nested contours.*/ + RETR_TREE = 3, + RETR_FLOODFILL = 4 //!< +}; + +//! the contour approximation algorithm +enum ContourApproximationModes { + /** stores absolutely all the contour points. That is, any 2 subsequent points (x1,y1) and + (x2,y2) of the contour will be either horizontal, vertical or diagonal neighbors, that is, + max(abs(x1-x2),abs(y2-y1))==1. */ + CHAIN_APPROX_NONE = 1, + /** compresses horizontal, vertical, and diagonal segments and leaves only their end points. + For example, an up-right rectangular contour is encoded with 4 points. */ + CHAIN_APPROX_SIMPLE = 2, + /** applies one of the flavors of the Teh-Chin chain approximation algorithm @cite TehChin89 */ + CHAIN_APPROX_TC89_L1 = 3, + /** applies one of the flavors of the Teh-Chin chain approximation algorithm @cite TehChin89 */ + CHAIN_APPROX_TC89_KCOS = 4 +}; + +/** @brief Shape matching methods + +\f$A\f$ denotes object1,\f$B\f$ denotes object2 + +\f$\begin{array}{l} m^A_i = \mathrm{sign} (h^A_i) \cdot \log{h^A_i} \\ m^B_i = \mathrm{sign} (h^B_i) \cdot \log{h^B_i} \end{array}\f$ + +and \f$h^A_i, h^B_i\f$ are the Hu moments of \f$A\f$ and \f$B\f$ , respectively. +*/ +enum ShapeMatchModes { + CONTOURS_MATCH_I1 =1, //!< \f[I_1(A,B) = \sum _{i=1...7} \left | \frac{1}{m^A_i} - \frac{1}{m^B_i} \right |\f] + CONTOURS_MATCH_I2 =2, //!< \f[I_2(A,B) = \sum _{i=1...7} \left | m^A_i - m^B_i \right |\f] + CONTOURS_MATCH_I3 =3 //!< \f[I_3(A,B) = \max _{i=1...7} \frac{ \left| m^A_i - m^B_i \right| }{ \left| m^A_i \right| }\f] +}; + +//! @} imgproc_shape + +//! @addtogroup imgproc_feature +//! @{ + +//! Variants of a Hough transform +enum HoughModes { + + /** classical or standard Hough transform. Every line is represented by two floating-point + numbers \f$(\rho, \theta)\f$ , where \f$\rho\f$ is a distance between (0,0) point and the line, + and \f$\theta\f$ is the angle between x-axis and the normal to the line. Thus, the matrix must + be (the created sequence will be) of CV_32FC2 type */ + HOUGH_STANDARD = 0, + /** probabilistic Hough transform (more efficient in case if the picture contains a few long + linear segments). It returns line segments rather than the whole line. Each segment is + represented by starting and ending points, and the matrix must be (the created sequence will + be) of the CV_32SC4 type. */ + HOUGH_PROBABILISTIC = 1, + /** multi-scale variant of the classical Hough transform. The lines are encoded the same way as + HOUGH_STANDARD. */ + HOUGH_MULTI_SCALE = 2, + HOUGH_GRADIENT = 3, //!< basically *21HT*, described in @cite Yuen90 + HOUGH_GRADIENT_ALT = 4, //!< variation of HOUGH_GRADIENT to get better accuracy +}; + +//! Variants of Line Segment %Detector +enum LineSegmentDetectorModes { + LSD_REFINE_NONE = 0, //!< No refinement applied + LSD_REFINE_STD = 1, //!< Standard refinement is applied. E.g. breaking arches into smaller straighter line approximations. + LSD_REFINE_ADV = 2 //!< Advanced refinement. Number of false alarms is calculated, lines are + //!< refined through increase of precision, decrement in size, etc. +}; + +//! @} imgproc_feature + +/** Histogram comparison methods + @ingroup imgproc_hist +*/ +enum HistCompMethods { + /** Correlation + \f[d(H_1,H_2) = \frac{\sum_I (H_1(I) - \bar{H_1}) (H_2(I) - \bar{H_2})}{\sqrt{\sum_I(H_1(I) - \bar{H_1})^2 \sum_I(H_2(I) - \bar{H_2})^2}}\f] + where + \f[\bar{H_k} = \frac{1}{N} \sum _J H_k(J)\f] + and \f$N\f$ is a total number of histogram bins. */ + HISTCMP_CORREL = 0, + /** Chi-Square + \f[d(H_1,H_2) = \sum _I \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)}\f] */ + HISTCMP_CHISQR = 1, + /** Intersection + \f[d(H_1,H_2) = \sum _I \min (H_1(I), H_2(I))\f] */ + HISTCMP_INTERSECT = 2, + /** Bhattacharyya distance + (In fact, OpenCV computes Hellinger distance, which is related to Bhattacharyya coefficient.) + \f[d(H_1,H_2) = \sqrt{1 - \frac{1}{\sqrt{\bar{H_1} \bar{H_2} N^2}} \sum_I \sqrt{H_1(I) \cdot H_2(I)}}\f] */ + HISTCMP_BHATTACHARYYA = 3, + HISTCMP_HELLINGER = HISTCMP_BHATTACHARYYA, //!< Synonym for HISTCMP_BHATTACHARYYA + /** Alternative Chi-Square + \f[d(H_1,H_2) = 2 * \sum _I \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)+H_2(I)}\f] + This alternative formula is regularly used for texture comparison. See e.g. @cite Puzicha1997 */ + HISTCMP_CHISQR_ALT = 4, + /** Kullback-Leibler divergence + \f[d(H_1,H_2) = \sum _I H_1(I) \log \left(\frac{H_1(I)}{H_2(I)}\right)\f] */ + HISTCMP_KL_DIV = 5 +}; + +/** the color conversion codes +@see @ref imgproc_color_conversions +@ingroup imgproc_color_conversions + */ +enum ColorConversionCodes { + COLOR_BGR2BGRA = 0, //!< add alpha channel to RGB or BGR image + COLOR_RGB2RGBA = COLOR_BGR2BGRA, + + COLOR_BGRA2BGR = 1, //!< remove alpha channel from RGB or BGR image + COLOR_RGBA2RGB = COLOR_BGRA2BGR, + + COLOR_BGR2RGBA = 2, //!< convert between RGB and BGR color spaces (with or without alpha channel) + COLOR_RGB2BGRA = COLOR_BGR2RGBA, + + COLOR_RGBA2BGR = 3, + COLOR_BGRA2RGB = COLOR_RGBA2BGR, + + COLOR_BGR2RGB = 4, + COLOR_RGB2BGR = COLOR_BGR2RGB, + + COLOR_BGRA2RGBA = 5, + COLOR_RGBA2BGRA = COLOR_BGRA2RGBA, + + COLOR_BGR2GRAY = 6, //!< convert between RGB/BGR and grayscale, @ref color_convert_rgb_gray "color conversions" + COLOR_RGB2GRAY = 7, + COLOR_GRAY2BGR = 8, + COLOR_GRAY2RGB = COLOR_GRAY2BGR, + COLOR_GRAY2BGRA = 9, + COLOR_GRAY2RGBA = COLOR_GRAY2BGRA, + COLOR_BGRA2GRAY = 10, + COLOR_RGBA2GRAY = 11, + + COLOR_BGR2BGR565 = 12, //!< convert between RGB/BGR and BGR565 (16-bit images) + COLOR_RGB2BGR565 = 13, + COLOR_BGR5652BGR = 14, + COLOR_BGR5652RGB = 15, + COLOR_BGRA2BGR565 = 16, + COLOR_RGBA2BGR565 = 17, + COLOR_BGR5652BGRA = 18, + COLOR_BGR5652RGBA = 19, + + COLOR_GRAY2BGR565 = 20, //!< convert between grayscale to BGR565 (16-bit images) + COLOR_BGR5652GRAY = 21, + + COLOR_BGR2BGR555 = 22, //!< convert between RGB/BGR and BGR555 (16-bit images) + COLOR_RGB2BGR555 = 23, + COLOR_BGR5552BGR = 24, + COLOR_BGR5552RGB = 25, + COLOR_BGRA2BGR555 = 26, + COLOR_RGBA2BGR555 = 27, + COLOR_BGR5552BGRA = 28, + COLOR_BGR5552RGBA = 29, + + COLOR_GRAY2BGR555 = 30, //!< convert between grayscale and BGR555 (16-bit images) + COLOR_BGR5552GRAY = 31, + + COLOR_BGR2XYZ = 32, //!< convert RGB/BGR to CIE XYZ, @ref color_convert_rgb_xyz "color conversions" + COLOR_RGB2XYZ = 33, + COLOR_XYZ2BGR = 34, + COLOR_XYZ2RGB = 35, + + COLOR_BGR2YCrCb = 36, //!< convert RGB/BGR to luma-chroma (aka YCC), @ref color_convert_rgb_ycrcb "color conversions" + COLOR_RGB2YCrCb = 37, + COLOR_YCrCb2BGR = 38, + COLOR_YCrCb2RGB = 39, + + COLOR_BGR2HSV = 40, //!< convert RGB/BGR to HSV (hue saturation value), @ref color_convert_rgb_hsv "color conversions" + COLOR_RGB2HSV = 41, + + COLOR_BGR2Lab = 44, //!< convert RGB/BGR to CIE Lab, @ref color_convert_rgb_lab "color conversions" + COLOR_RGB2Lab = 45, + + COLOR_BGR2Luv = 50, //!< convert RGB/BGR to CIE Luv, @ref color_convert_rgb_luv "color conversions" + COLOR_RGB2Luv = 51, + COLOR_BGR2HLS = 52, //!< convert RGB/BGR to HLS (hue lightness saturation), @ref color_convert_rgb_hls "color conversions" + COLOR_RGB2HLS = 53, + + COLOR_HSV2BGR = 54, //!< backward conversions to RGB/BGR + COLOR_HSV2RGB = 55, + + COLOR_Lab2BGR = 56, + COLOR_Lab2RGB = 57, + COLOR_Luv2BGR = 58, + COLOR_Luv2RGB = 59, + COLOR_HLS2BGR = 60, + COLOR_HLS2RGB = 61, + + COLOR_BGR2HSV_FULL = 66, + COLOR_RGB2HSV_FULL = 67, + COLOR_BGR2HLS_FULL = 68, + COLOR_RGB2HLS_FULL = 69, + + COLOR_HSV2BGR_FULL = 70, + COLOR_HSV2RGB_FULL = 71, + COLOR_HLS2BGR_FULL = 72, + COLOR_HLS2RGB_FULL = 73, + + COLOR_LBGR2Lab = 74, + COLOR_LRGB2Lab = 75, + COLOR_LBGR2Luv = 76, + COLOR_LRGB2Luv = 77, + + COLOR_Lab2LBGR = 78, + COLOR_Lab2LRGB = 79, + COLOR_Luv2LBGR = 80, + COLOR_Luv2LRGB = 81, + + COLOR_BGR2YUV = 82, //!< convert between RGB/BGR and YUV + COLOR_RGB2YUV = 83, + COLOR_YUV2BGR = 84, + COLOR_YUV2RGB = 85, + + //! YUV 4:2:0 family to RGB + COLOR_YUV2RGB_NV12 = 90, + COLOR_YUV2BGR_NV12 = 91, + COLOR_YUV2RGB_NV21 = 92, + COLOR_YUV2BGR_NV21 = 93, + COLOR_YUV420sp2RGB = COLOR_YUV2RGB_NV21, + COLOR_YUV420sp2BGR = COLOR_YUV2BGR_NV21, + + COLOR_YUV2RGBA_NV12 = 94, + COLOR_YUV2BGRA_NV12 = 95, + COLOR_YUV2RGBA_NV21 = 96, + COLOR_YUV2BGRA_NV21 = 97, + COLOR_YUV420sp2RGBA = COLOR_YUV2RGBA_NV21, + COLOR_YUV420sp2BGRA = COLOR_YUV2BGRA_NV21, + + COLOR_YUV2RGB_YV12 = 98, + COLOR_YUV2BGR_YV12 = 99, + COLOR_YUV2RGB_IYUV = 100, + COLOR_YUV2BGR_IYUV = 101, + COLOR_YUV2RGB_I420 = COLOR_YUV2RGB_IYUV, + COLOR_YUV2BGR_I420 = COLOR_YUV2BGR_IYUV, + COLOR_YUV420p2RGB = COLOR_YUV2RGB_YV12, + COLOR_YUV420p2BGR = COLOR_YUV2BGR_YV12, + + COLOR_YUV2RGBA_YV12 = 102, + COLOR_YUV2BGRA_YV12 = 103, + COLOR_YUV2RGBA_IYUV = 104, + COLOR_YUV2BGRA_IYUV = 105, + COLOR_YUV2RGBA_I420 = COLOR_YUV2RGBA_IYUV, + COLOR_YUV2BGRA_I420 = COLOR_YUV2BGRA_IYUV, + COLOR_YUV420p2RGBA = COLOR_YUV2RGBA_YV12, + COLOR_YUV420p2BGRA = COLOR_YUV2BGRA_YV12, + + COLOR_YUV2GRAY_420 = 106, + COLOR_YUV2GRAY_NV21 = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_NV12 = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_YV12 = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_IYUV = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_I420 = COLOR_YUV2GRAY_420, + COLOR_YUV420sp2GRAY = COLOR_YUV2GRAY_420, + COLOR_YUV420p2GRAY = COLOR_YUV2GRAY_420, + + //! YUV 4:2:2 family to RGB + COLOR_YUV2RGB_UYVY = 107, + COLOR_YUV2BGR_UYVY = 108, + //COLOR_YUV2RGB_VYUY = 109, + //COLOR_YUV2BGR_VYUY = 110, + COLOR_YUV2RGB_Y422 = COLOR_YUV2RGB_UYVY, + COLOR_YUV2BGR_Y422 = COLOR_YUV2BGR_UYVY, + COLOR_YUV2RGB_UYNV = COLOR_YUV2RGB_UYVY, + COLOR_YUV2BGR_UYNV = COLOR_YUV2BGR_UYVY, + + COLOR_YUV2RGBA_UYVY = 111, + COLOR_YUV2BGRA_UYVY = 112, + //COLOR_YUV2RGBA_VYUY = 113, + //COLOR_YUV2BGRA_VYUY = 114, + COLOR_YUV2RGBA_Y422 = COLOR_YUV2RGBA_UYVY, + COLOR_YUV2BGRA_Y422 = COLOR_YUV2BGRA_UYVY, + COLOR_YUV2RGBA_UYNV = COLOR_YUV2RGBA_UYVY, + COLOR_YUV2BGRA_UYNV = COLOR_YUV2BGRA_UYVY, + + COLOR_YUV2RGB_YUY2 = 115, + COLOR_YUV2BGR_YUY2 = 116, + COLOR_YUV2RGB_YVYU = 117, + COLOR_YUV2BGR_YVYU = 118, + COLOR_YUV2RGB_YUYV = COLOR_YUV2RGB_YUY2, + COLOR_YUV2BGR_YUYV = COLOR_YUV2BGR_YUY2, + COLOR_YUV2RGB_YUNV = COLOR_YUV2RGB_YUY2, + COLOR_YUV2BGR_YUNV = COLOR_YUV2BGR_YUY2, + + COLOR_YUV2RGBA_YUY2 = 119, + COLOR_YUV2BGRA_YUY2 = 120, + COLOR_YUV2RGBA_YVYU = 121, + COLOR_YUV2BGRA_YVYU = 122, + COLOR_YUV2RGBA_YUYV = COLOR_YUV2RGBA_YUY2, + COLOR_YUV2BGRA_YUYV = COLOR_YUV2BGRA_YUY2, + COLOR_YUV2RGBA_YUNV = COLOR_YUV2RGBA_YUY2, + COLOR_YUV2BGRA_YUNV = COLOR_YUV2BGRA_YUY2, + + COLOR_YUV2GRAY_UYVY = 123, + COLOR_YUV2GRAY_YUY2 = 124, + //CV_YUV2GRAY_VYUY = CV_YUV2GRAY_UYVY, + COLOR_YUV2GRAY_Y422 = COLOR_YUV2GRAY_UYVY, + COLOR_YUV2GRAY_UYNV = COLOR_YUV2GRAY_UYVY, + COLOR_YUV2GRAY_YVYU = COLOR_YUV2GRAY_YUY2, + COLOR_YUV2GRAY_YUYV = COLOR_YUV2GRAY_YUY2, + COLOR_YUV2GRAY_YUNV = COLOR_YUV2GRAY_YUY2, + + //! alpha premultiplication + COLOR_RGBA2mRGBA = 125, + COLOR_mRGBA2RGBA = 126, + + //! RGB to YUV 4:2:0 family + COLOR_RGB2YUV_I420 = 127, + COLOR_BGR2YUV_I420 = 128, + COLOR_RGB2YUV_IYUV = COLOR_RGB2YUV_I420, + COLOR_BGR2YUV_IYUV = COLOR_BGR2YUV_I420, + + COLOR_RGBA2YUV_I420 = 129, + COLOR_BGRA2YUV_I420 = 130, + COLOR_RGBA2YUV_IYUV = COLOR_RGBA2YUV_I420, + COLOR_BGRA2YUV_IYUV = COLOR_BGRA2YUV_I420, + COLOR_RGB2YUV_YV12 = 131, + COLOR_BGR2YUV_YV12 = 132, + COLOR_RGBA2YUV_YV12 = 133, + COLOR_BGRA2YUV_YV12 = 134, + + //! Demosaicing + COLOR_BayerBG2BGR = 46, + COLOR_BayerGB2BGR = 47, + COLOR_BayerRG2BGR = 48, + COLOR_BayerGR2BGR = 49, + + COLOR_BayerBG2RGB = COLOR_BayerRG2BGR, + COLOR_BayerGB2RGB = COLOR_BayerGR2BGR, + COLOR_BayerRG2RGB = COLOR_BayerBG2BGR, + COLOR_BayerGR2RGB = COLOR_BayerGB2BGR, + + COLOR_BayerBG2GRAY = 86, + COLOR_BayerGB2GRAY = 87, + COLOR_BayerRG2GRAY = 88, + COLOR_BayerGR2GRAY = 89, + + //! Demosaicing using Variable Number of Gradients + COLOR_BayerBG2BGR_VNG = 62, + COLOR_BayerGB2BGR_VNG = 63, + COLOR_BayerRG2BGR_VNG = 64, + COLOR_BayerGR2BGR_VNG = 65, + + COLOR_BayerBG2RGB_VNG = COLOR_BayerRG2BGR_VNG, + COLOR_BayerGB2RGB_VNG = COLOR_BayerGR2BGR_VNG, + COLOR_BayerRG2RGB_VNG = COLOR_BayerBG2BGR_VNG, + COLOR_BayerGR2RGB_VNG = COLOR_BayerGB2BGR_VNG, + + //! Edge-Aware Demosaicing + COLOR_BayerBG2BGR_EA = 135, + COLOR_BayerGB2BGR_EA = 136, + COLOR_BayerRG2BGR_EA = 137, + COLOR_BayerGR2BGR_EA = 138, + + COLOR_BayerBG2RGB_EA = COLOR_BayerRG2BGR_EA, + COLOR_BayerGB2RGB_EA = COLOR_BayerGR2BGR_EA, + COLOR_BayerRG2RGB_EA = COLOR_BayerBG2BGR_EA, + COLOR_BayerGR2RGB_EA = COLOR_BayerGB2BGR_EA, + + //! Demosaicing with alpha channel + COLOR_BayerBG2BGRA = 139, + COLOR_BayerGB2BGRA = 140, + COLOR_BayerRG2BGRA = 141, + COLOR_BayerGR2BGRA = 142, + + COLOR_BayerBG2RGBA = COLOR_BayerRG2BGRA, + COLOR_BayerGB2RGBA = COLOR_BayerGR2BGRA, + COLOR_BayerRG2RGBA = COLOR_BayerBG2BGRA, + COLOR_BayerGR2RGBA = COLOR_BayerGB2BGRA, + + COLOR_COLORCVT_MAX = 143 +}; + +//! @addtogroup imgproc_shape +//! @{ + +//! types of intersection between rectangles +enum RectanglesIntersectTypes { + INTERSECT_NONE = 0, //!< No intersection + INTERSECT_PARTIAL = 1, //!< There is a partial intersection + INTERSECT_FULL = 2 //!< One of the rectangle is fully enclosed in the other +}; + +/** types of line +@ingroup imgproc_draw +*/ +enum LineTypes { + FILLED = -1, + LINE_4 = 4, //!< 4-connected line + LINE_8 = 8, //!< 8-connected line + LINE_AA = 16 //!< antialiased line +}; + +/** Only a subset of Hershey fonts are supported +@ingroup imgproc_draw +*/ +enum HersheyFonts { + FONT_HERSHEY_SIMPLEX = 0, //!< normal size sans-serif font + FONT_HERSHEY_PLAIN = 1, //!< small size sans-serif font + FONT_HERSHEY_DUPLEX = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX) + FONT_HERSHEY_COMPLEX = 3, //!< normal size serif font + FONT_HERSHEY_TRIPLEX = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX) + FONT_HERSHEY_COMPLEX_SMALL = 5, //!< smaller version of FONT_HERSHEY_COMPLEX + FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font + FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX + FONT_ITALIC = 16 //!< flag for italic font +}; + +/** Possible set of marker types used for the cv::drawMarker function +@ingroup imgproc_draw +*/ +enum MarkerTypes +{ + MARKER_CROSS = 0, //!< A crosshair marker shape + MARKER_TILTED_CROSS = 1, //!< A 45 degree tilted crosshair marker shape + MARKER_STAR = 2, //!< A star marker shape, combination of cross and tilted cross + MARKER_DIAMOND = 3, //!< A diamond marker shape + MARKER_SQUARE = 4, //!< A square marker shape + MARKER_TRIANGLE_UP = 5, //!< An upwards pointing triangle marker shape + MARKER_TRIANGLE_DOWN = 6 //!< A downwards pointing triangle marker shape +}; + +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform +*/ +class CV_EXPORTS_W GeneralizedHough : public Algorithm +{ +public: + //! set template to search + CV_WRAP virtual void setTemplate(InputArray templ, Point templCenter = Point(-1, -1)) = 0; + CV_WRAP virtual void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1)) = 0; + + //! find template on image + CV_WRAP virtual void detect(InputArray image, OutputArray positions, OutputArray votes = noArray()) = 0; + CV_WRAP virtual void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = noArray()) = 0; + + //! Canny low threshold. + CV_WRAP virtual void setCannyLowThresh(int cannyLowThresh) = 0; + CV_WRAP virtual int getCannyLowThresh() const = 0; + + //! Canny high threshold. + CV_WRAP virtual void setCannyHighThresh(int cannyHighThresh) = 0; + CV_WRAP virtual int getCannyHighThresh() const = 0; + + //! Minimum distance between the centers of the detected objects. + CV_WRAP virtual void setMinDist(double minDist) = 0; + CV_WRAP virtual double getMinDist() const = 0; + + //! Inverse ratio of the accumulator resolution to the image resolution. + CV_WRAP virtual void setDp(double dp) = 0; + CV_WRAP virtual double getDp() const = 0; + + //! Maximal size of inner buffers. + CV_WRAP virtual void setMaxBufferSize(int maxBufferSize) = 0; + CV_WRAP virtual int getMaxBufferSize() const = 0; +}; + +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform + +Detects position only without translation and rotation @cite Ballard1981 . +*/ +class CV_EXPORTS_W GeneralizedHoughBallard : public GeneralizedHough +{ +public: + //! R-Table levels. + CV_WRAP virtual void setLevels(int levels) = 0; + CV_WRAP virtual int getLevels() const = 0; + + //! The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected. + CV_WRAP virtual void setVotesThreshold(int votesThreshold) = 0; + CV_WRAP virtual int getVotesThreshold() const = 0; +}; + +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform + +Detects position, translation and rotation @cite Guil1999 . +*/ +class CV_EXPORTS_W GeneralizedHoughGuil : public GeneralizedHough +{ +public: + //! Angle difference in degrees between two points in feature. + CV_WRAP virtual void setXi(double xi) = 0; + CV_WRAP virtual double getXi() const = 0; + + //! Feature table levels. + CV_WRAP virtual void setLevels(int levels) = 0; + CV_WRAP virtual int getLevels() const = 0; + + //! Maximal difference between angles that treated as equal. + CV_WRAP virtual void setAngleEpsilon(double angleEpsilon) = 0; + CV_WRAP virtual double getAngleEpsilon() const = 0; + + //! Minimal rotation angle to detect in degrees. + CV_WRAP virtual void setMinAngle(double minAngle) = 0; + CV_WRAP virtual double getMinAngle() const = 0; + + //! Maximal rotation angle to detect in degrees. + CV_WRAP virtual void setMaxAngle(double maxAngle) = 0; + CV_WRAP virtual double getMaxAngle() const = 0; + + //! Angle step in degrees. + CV_WRAP virtual void setAngleStep(double angleStep) = 0; + CV_WRAP virtual double getAngleStep() const = 0; + + //! Angle votes threshold. + CV_WRAP virtual void setAngleThresh(int angleThresh) = 0; + CV_WRAP virtual int getAngleThresh() const = 0; + + //! Minimal scale to detect. + CV_WRAP virtual void setMinScale(double minScale) = 0; + CV_WRAP virtual double getMinScale() const = 0; + + //! Maximal scale to detect. + CV_WRAP virtual void setMaxScale(double maxScale) = 0; + CV_WRAP virtual double getMaxScale() const = 0; + + //! Scale step. + CV_WRAP virtual void setScaleStep(double scaleStep) = 0; + CV_WRAP virtual double getScaleStep() const = 0; + + //! Scale votes threshold. + CV_WRAP virtual void setScaleThresh(int scaleThresh) = 0; + CV_WRAP virtual int getScaleThresh() const = 0; + + //! Position votes threshold. + CV_WRAP virtual void setPosThresh(int posThresh) = 0; + CV_WRAP virtual int getPosThresh() const = 0; +}; + +//! @} imgproc_shape + +//! @addtogroup imgproc_hist +//! @{ + +/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. +*/ +class CV_EXPORTS_W CLAHE : public Algorithm +{ +public: + /** @brief Equalizes the histogram of a grayscale image using Contrast Limited Adaptive Histogram Equalization. + + @param src Source image of type CV_8UC1 or CV_16UC1. + @param dst Destination image. + */ + CV_WRAP virtual void apply(InputArray src, OutputArray dst) = 0; + + /** @brief Sets threshold for contrast limiting. + + @param clipLimit threshold value. + */ + CV_WRAP virtual void setClipLimit(double clipLimit) = 0; + + //! Returns threshold value for contrast limiting. + CV_WRAP virtual double getClipLimit() const = 0; + + /** @brief Sets size of grid for histogram equalization. Input image will be divided into + equally sized rectangular tiles. + + @param tileGridSize defines the number of tiles in row and column. + */ + CV_WRAP virtual void setTilesGridSize(Size tileGridSize) = 0; + + //!@brief Returns Size defines the number of tiles in row and column. + CV_WRAP virtual Size getTilesGridSize() const = 0; + + CV_WRAP virtual void collectGarbage() = 0; +}; + +//! @} imgproc_hist + +//! @addtogroup imgproc_subdiv2d +//! @{ + +class CV_EXPORTS_W Subdiv2D +{ +public: + /** Subdiv2D point location cases */ + enum { PTLOC_ERROR = -2, //!< Point location error + PTLOC_OUTSIDE_RECT = -1, //!< Point outside the subdivision bounding rect + PTLOC_INSIDE = 0, //!< Point inside some facet + PTLOC_VERTEX = 1, //!< Point coincides with one of the subdivision vertices + PTLOC_ON_EDGE = 2 //!< Point on some edge + }; + + /** Subdiv2D edge type navigation (see: getEdge()) */ + enum { NEXT_AROUND_ORG = 0x00, + NEXT_AROUND_DST = 0x22, + PREV_AROUND_ORG = 0x11, + PREV_AROUND_DST = 0x33, + NEXT_AROUND_LEFT = 0x13, + NEXT_AROUND_RIGHT = 0x31, + PREV_AROUND_LEFT = 0x20, + PREV_AROUND_RIGHT = 0x02 + }; + + /** creates an empty Subdiv2D object. + To create a new empty Delaunay subdivision you need to use the #initDelaunay function. + */ + CV_WRAP Subdiv2D(); + + /** @overload + + @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision. + + The function creates an empty Delaunay subdivision where 2D points can be added using the function + insert() . All of the points to be added must be within the specified rectangle, otherwise a runtime + error is raised. + */ + CV_WRAP Subdiv2D(Rect rect); + + /** @brief Creates a new empty Delaunay subdivision + + @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision. + + */ + CV_WRAP void initDelaunay(Rect rect); + + /** @brief Insert a single point into a Delaunay triangulation. + + @param pt Point to insert. + + The function inserts a single point into a subdivision and modifies the subdivision topology + appropriately. If a point with the same coordinates exists already, no new point is added. + @returns the ID of the point. + + @note If the point is outside of the triangulation specified rect a runtime error is raised. + */ + CV_WRAP int insert(Point2f pt); + + /** @brief Insert multiple points into a Delaunay triangulation. + + @param ptvec Points to insert. + + The function inserts a vector of points into a subdivision and modifies the subdivision topology + appropriately. + */ + CV_WRAP void insert(const std::vector& ptvec); + + /** @brief Returns the location of a point within a Delaunay triangulation. + + @param pt Point to locate. + @param edge Output edge that the point belongs to or is located to the right of it. + @param vertex Optional output vertex the input point coincides with. + + The function locates the input point within the subdivision and gives one of the triangle edges + or vertices. + + @returns an integer which specify one of the following five cases for point location: + - The point falls into some facet. The function returns #PTLOC_INSIDE and edge will contain one of + edges of the facet. + - The point falls onto the edge. The function returns #PTLOC_ON_EDGE and edge will contain this edge. + - The point coincides with one of the subdivision vertices. The function returns #PTLOC_VERTEX and + vertex will contain a pointer to the vertex. + - The point is outside the subdivision reference rectangle. The function returns #PTLOC_OUTSIDE_RECT + and no pointers are filled. + - One of input arguments is invalid. A runtime error is raised or, if silent or "parent" error + processing mode is selected, #PTLOC_ERROR is returned. + */ + CV_WRAP int locate(Point2f pt, CV_OUT int& edge, CV_OUT int& vertex); + + /** @brief Finds the subdivision vertex closest to the given point. + + @param pt Input point. + @param nearestPt Output subdivision vertex point. + + The function is another function that locates the input point within the subdivision. It finds the + subdivision vertex that is the closest to the input point. It is not necessarily one of vertices + of the facet containing the input point, though the facet (located using locate() ) is used as a + starting point. + + @returns vertex ID. + */ + CV_WRAP int findNearest(Point2f pt, CV_OUT Point2f* nearestPt = 0); + + /** @brief Returns a list of all edges. + + @param edgeList Output vector. + + The function gives each edge as a 4 numbers vector, where each two are one of the edge + vertices. i.e. org_x = v[0], org_y = v[1], dst_x = v[2], dst_y = v[3]. + */ + CV_WRAP void getEdgeList(CV_OUT std::vector& edgeList) const; + + /** @brief Returns a list of the leading edge ID connected to each triangle. + + @param leadingEdgeList Output vector. + + The function gives one edge ID for each triangle. + */ + CV_WRAP void getLeadingEdgeList(CV_OUT std::vector& leadingEdgeList) const; + + /** @brief Returns a list of all triangles. + + @param triangleList Output vector. + + The function gives each triangle as a 6 numbers vector, where each two are one of the triangle + vertices. i.e. p1_x = v[0], p1_y = v[1], p2_x = v[2], p2_y = v[3], p3_x = v[4], p3_y = v[5]. + */ + CV_WRAP void getTriangleList(CV_OUT std::vector& triangleList) const; + + /** @brief Returns a list of all Voronoi facets. + + @param idx Vector of vertices IDs to consider. For all vertices you can pass empty vector. + @param facetList Output vector of the Voronoi facets. + @param facetCenters Output vector of the Voronoi facets center points. + + */ + CV_WRAP void getVoronoiFacetList(const std::vector& idx, CV_OUT std::vector >& facetList, + CV_OUT std::vector& facetCenters); + + /** @brief Returns vertex location from vertex ID. + + @param vertex vertex ID. + @param firstEdge Optional. The first edge ID which is connected to the vertex. + @returns vertex (x,y) + + */ + CV_WRAP Point2f getVertex(int vertex, CV_OUT int* firstEdge = 0) const; + + /** @brief Returns one of the edges related to the given edge. + + @param edge Subdivision edge ID. + @param nextEdgeType Parameter specifying which of the related edges to return. + The following values are possible: + - NEXT_AROUND_ORG next around the edge origin ( eOnext on the picture below if e is the input edge) + - NEXT_AROUND_DST next around the edge vertex ( eDnext ) + - PREV_AROUND_ORG previous around the edge origin (reversed eRnext ) + - PREV_AROUND_DST previous around the edge destination (reversed eLnext ) + - NEXT_AROUND_LEFT next around the left facet ( eLnext ) + - NEXT_AROUND_RIGHT next around the right facet ( eRnext ) + - PREV_AROUND_LEFT previous around the left facet (reversed eOnext ) + - PREV_AROUND_RIGHT previous around the right facet (reversed eDnext ) + + ![sample output](pics/quadedge.png) + + @returns edge ID related to the input edge. + */ + CV_WRAP int getEdge( int edge, int nextEdgeType ) const; + + /** @brief Returns next edge around the edge origin. + + @param edge Subdivision edge ID. + + @returns an integer which is next edge ID around the edge origin: eOnext on the + picture above if e is the input edge). + */ + CV_WRAP int nextEdge(int edge) const; + + /** @brief Returns another edge of the same quad-edge. + + @param edge Subdivision edge ID. + @param rotate Parameter specifying which of the edges of the same quad-edge as the input + one to return. The following values are possible: + - 0 - the input edge ( e on the picture below if e is the input edge) + - 1 - the rotated edge ( eRot ) + - 2 - the reversed edge (reversed e (in green)) + - 3 - the reversed rotated edge (reversed eRot (in green)) + + @returns one of the edges ID of the same quad-edge as the input edge. + */ + CV_WRAP int rotateEdge(int edge, int rotate) const; + CV_WRAP int symEdge(int edge) const; + + /** @brief Returns the edge origin. + + @param edge Subdivision edge ID. + @param orgpt Output vertex location. + + @returns vertex ID. + */ + CV_WRAP int edgeOrg(int edge, CV_OUT Point2f* orgpt = 0) const; + + /** @brief Returns the edge destination. + + @param edge Subdivision edge ID. + @param dstpt Output vertex location. + + @returns vertex ID. + */ + CV_WRAP int edgeDst(int edge, CV_OUT Point2f* dstpt = 0) const; + +protected: + int newEdge(); + void deleteEdge(int edge); + int newPoint(Point2f pt, bool isvirtual, int firstEdge = 0); + void deletePoint(int vtx); + void setEdgePoints( int edge, int orgPt, int dstPt ); + void splice( int edgeA, int edgeB ); + int connectEdges( int edgeA, int edgeB ); + void swapEdges( int edge ); + int isRightOf(Point2f pt, int edge) const; + void calcVoronoi(); + void clearVoronoi(); + void checkSubdiv() const; + + struct CV_EXPORTS Vertex + { + Vertex(); + Vertex(Point2f pt, bool _isvirtual, int _firstEdge=0); + bool isvirtual() const; + bool isfree() const; + + int firstEdge; + int type; + Point2f pt; + }; + + struct CV_EXPORTS QuadEdge + { + QuadEdge(); + QuadEdge(int edgeidx); + bool isfree() const; + + int next[4]; + int pt[4]; + }; + + //! All of the vertices + std::vector vtx; + //! All of the edges + std::vector qedges; + int freeQEdge; + int freePoint; + bool validGeometry; + + int recentEdge; + //! Top left corner of the bounding rect + Point2f topLeft; + //! Bottom right corner of the bounding rect + Point2f bottomRight; +}; + +//! @} imgproc_subdiv2d + +//! @addtogroup imgproc_feature +//! @{ + +/** @brief Line segment detector class + +following the algorithm described at @cite Rafael12 . + +@note Implementation has been removed due original code license conflict + +*/ +class CV_EXPORTS_W LineSegmentDetector : public Algorithm +{ +public: + + /** @brief Finds lines in the input image. + + This is the output of the default parameters of the algorithm on the above shown image. + + ![image](pics/building_lsd.png) + + @param _image A grayscale (CV_8UC1) input image. If only a roi needs to be selected, use: + `lsd_ptr-\>detect(image(roi), lines, ...); lines += Scalar(roi.x, roi.y, roi.x, roi.y);` + @param _lines A vector of Vec4i or Vec4f elements specifying the beginning and ending point of a line. Where + Vec4i/Vec4f is (x1, y1, x2, y2), point 1 is the start, point 2 - end. Returned lines are strictly + oriented depending on the gradient. + @param width Vector of widths of the regions, where the lines are found. E.g. Width of line. + @param prec Vector of precisions with which the lines are found. + @param nfa Vector containing number of false alarms in the line region, with precision of 10%. The + bigger the value, logarithmically better the detection. + - -1 corresponds to 10 mean false alarms + - 0 corresponds to 1 mean false alarm + - 1 corresponds to 0.1 mean false alarms + This vector will be calculated only when the objects type is #LSD_REFINE_ADV. + */ + CV_WRAP virtual void detect(InputArray _image, OutputArray _lines, + OutputArray width = noArray(), OutputArray prec = noArray(), + OutputArray nfa = noArray()) = 0; + + /** @brief Draws the line segments on a given image. + @param _image The image, where the lines will be drawn. Should be bigger or equal to the image, + where the lines were found. + @param lines A vector of the lines that needed to be drawn. + */ + CV_WRAP virtual void drawSegments(InputOutputArray _image, InputArray lines) = 0; + + /** @brief Draws two groups of lines in blue and red, counting the non overlapping (mismatching) pixels. + + @param size The size of the image, where lines1 and lines2 were found. + @param lines1 The first group of lines that needs to be drawn. It is visualized in blue color. + @param lines2 The second group of lines. They visualized in red color. + @param _image Optional image, where the lines will be drawn. The image should be color(3-channel) + in order for lines1 and lines2 to be drawn in the above mentioned colors. + */ + CV_WRAP virtual int compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image = noArray()) = 0; + + virtual ~LineSegmentDetector() { } +}; + +/** @brief Creates a smart pointer to a LineSegmentDetector object and initializes it. + +The LineSegmentDetector algorithm is defined using the standard values. Only advanced users may want +to edit those, as to tailor it for their own application. + +@param _refine The way found lines will be refined, see #LineSegmentDetectorModes +@param _scale The scale of the image that will be used to find the lines. Range (0..1]. +@param _sigma_scale Sigma for Gaussian filter. It is computed as sigma = _sigma_scale/_scale. +@param _quant Bound to the quantization error on the gradient norm. +@param _ang_th Gradient angle tolerance in degrees. +@param _log_eps Detection threshold: -log10(NFA) \> log_eps. Used only when advance refinement +is chosen. +@param _density_th Minimal density of aligned region points in the enclosing rectangle. +@param _n_bins Number of bins in pseudo-ordering of gradient modulus. + +@note Implementation has been removed due original code license conflict + */ +CV_EXPORTS_W Ptr createLineSegmentDetector( + int _refine = LSD_REFINE_STD, double _scale = 0.8, + double _sigma_scale = 0.6, double _quant = 2.0, double _ang_th = 22.5, + double _log_eps = 0, double _density_th = 0.7, int _n_bins = 1024); + +//! @} imgproc_feature + +//! @addtogroup imgproc_filter +//! @{ + +/** @brief Returns Gaussian filter coefficients. + +The function computes and returns the \f$\texttt{ksize} \times 1\f$ matrix of Gaussian filter +coefficients: + +\f[G_i= \alpha *e^{-(i-( \texttt{ksize} -1)/2)^2/(2* \texttt{sigma}^2)},\f] + +where \f$i=0..\texttt{ksize}-1\f$ and \f$\alpha\f$ is the scale factor chosen so that \f$\sum_i G_i=1\f$. + +Two of such generated kernels can be passed to sepFilter2D. Those functions automatically recognize +smoothing kernels (a symmetrical kernel with sum of weights equal to 1) and handle them accordingly. +You may also use the higher-level GaussianBlur. +@param ksize Aperture size. It should be odd ( \f$\texttt{ksize} \mod 2 = 1\f$ ) and positive. +@param sigma Gaussian standard deviation. If it is non-positive, it is computed from ksize as +`sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. +@param ktype Type of filter coefficients. It can be CV_32F or CV_64F . +@sa sepFilter2D, getDerivKernels, getStructuringElement, GaussianBlur + */ +CV_EXPORTS_W Mat getGaussianKernel( int ksize, double sigma, int ktype = CV_64F ); + +/** @brief Returns filter coefficients for computing spatial image derivatives. + +The function computes and returns the filter coefficients for spatial image derivatives. When +`ksize=FILTER_SCHARR`, the Scharr \f$3 \times 3\f$ kernels are generated (see #Scharr). Otherwise, Sobel +kernels are generated (see #Sobel). The filters are normally passed to #sepFilter2D or to + +@param kx Output matrix of row filter coefficients. It has the type ktype . +@param ky Output matrix of column filter coefficients. It has the type ktype . +@param dx Derivative order in respect of x. +@param dy Derivative order in respect of y. +@param ksize Aperture size. It can be FILTER_SCHARR, 1, 3, 5, or 7. +@param normalize Flag indicating whether to normalize (scale down) the filter coefficients or not. +Theoretically, the coefficients should have the denominator \f$=2^{ksize*2-dx-dy-2}\f$. If you are +going to filter floating-point images, you are likely to use the normalized kernels. But if you +compute derivatives of an 8-bit image, store the results in a 16-bit image, and wish to preserve +all the fractional bits, you may want to set normalize=false . +@param ktype Type of filter coefficients. It can be CV_32f or CV_64F . + */ +CV_EXPORTS_W void getDerivKernels( OutputArray kx, OutputArray ky, + int dx, int dy, int ksize, + bool normalize = false, int ktype = CV_32F ); + +/** @brief Returns Gabor filter coefficients. + +For more details about gabor filter equations and parameters, see: [Gabor +Filter](http://en.wikipedia.org/wiki/Gabor_filter). + +@param ksize Size of the filter returned. +@param sigma Standard deviation of the gaussian envelope. +@param theta Orientation of the normal to the parallel stripes of a Gabor function. +@param lambd Wavelength of the sinusoidal factor. +@param gamma Spatial aspect ratio. +@param psi Phase offset. +@param ktype Type of filter coefficients. It can be CV_32F or CV_64F . + */ +CV_EXPORTS_W Mat getGaborKernel( Size ksize, double sigma, double theta, double lambd, + double gamma, double psi = CV_PI*0.5, int ktype = CV_64F ); + +//! returns "magic" border value for erosion and dilation. It is automatically transformed to Scalar::all(-DBL_MAX) for dilation. +static inline Scalar morphologyDefaultBorderValue() { return Scalar::all(DBL_MAX); } + +/** @brief Returns a structuring element of the specified size and shape for morphological operations. + +The function constructs and returns the structuring element that can be further passed to #erode, +#dilate or #morphologyEx. But you can also construct an arbitrary binary mask yourself and use it as +the structuring element. + +@param shape Element shape that could be one of #MorphShapes +@param ksize Size of the structuring element. +@param anchor Anchor position within the element. The default value \f$(-1, -1)\f$ means that the +anchor is at the center. Note that only the shape of a cross-shaped element depends on the anchor +position. In other cases the anchor just regulates how much the result of the morphological +operation is shifted. + */ +CV_EXPORTS_W Mat getStructuringElement(int shape, Size ksize, Point anchor = Point(-1,-1)); + +/** @example samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp +Sample code for simple filters +![Sample screenshot](Smoothing_Tutorial_Result_Median_Filter.jpg) +Check @ref tutorial_gausian_median_blur_bilateral_filter "the corresponding tutorial" for more details + */ + +/** @brief Blurs an image using the median filter. + +The function smoothes an image using the median filter with the \f$\texttt{ksize} \times +\texttt{ksize}\f$ aperture. Each channel of a multi-channel image is processed independently. +In-place operation is supported. + +@note The median filter uses #BORDER_REPLICATE internally to cope with border pixels, see #BorderTypes + +@param src input 1-, 3-, or 4-channel image; when ksize is 3 or 5, the image depth should be +CV_8U, CV_16U, or CV_32F, for larger aperture sizes, it can only be CV_8U. +@param dst destination array of the same size and type as src. +@param ksize aperture linear size; it must be odd and greater than 1, for example: 3, 5, 7 ... +@sa bilateralFilter, blur, boxFilter, GaussianBlur + */ +CV_EXPORTS_W void medianBlur( InputArray src, OutputArray dst, int ksize ); + +/** @brief Blurs an image using a Gaussian filter. + +The function convolves the source image with the specified Gaussian kernel. In-place filtering is +supported. + +@param src input image; the image can have any number of channels, which are processed +independently, but the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param ksize Gaussian kernel size. ksize.width and ksize.height can differ but they both must be +positive and odd. Or, they can be zero's and then they are computed from sigma. +@param sigmaX Gaussian kernel standard deviation in X direction. +@param sigmaY Gaussian kernel standard deviation in Y direction; if sigmaY is zero, it is set to be +equal to sigmaX, if both sigmas are zeros, they are computed from ksize.width and ksize.height, +respectively (see #getGaussianKernel for details); to fully control the result regardless of +possible future modifications of all this semantics, it is recommended to specify all of ksize, +sigmaX, and sigmaY. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. + +@sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur + */ +CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize, + double sigmaX, double sigmaY = 0, + int borderType = BORDER_DEFAULT ); + +/** @brief Applies the bilateral filter to an image. + +The function applies bilateral filtering to the input image, as described in +http://www.dai.ed.ac.uk/CVonline/LOCAL_COPIES/MANDUCHI1/Bilateral_Filtering.html +bilateralFilter can reduce unwanted noise very well while keeping edges fairly sharp. However, it is +very slow compared to most filters. + +_Sigma values_: For simplicity, you can set the 2 sigma values to be the same. If they are small (\< +10), the filter will not have much effect, whereas if they are large (\> 150), they will have a very +strong effect, making the image look "cartoonish". + +_Filter size_: Large filters (d \> 5) are very slow, so it is recommended to use d=5 for real-time +applications, and perhaps d=9 for offline applications that need heavy noise filtering. + +This filter does not work inplace. +@param src Source 8-bit or floating-point, 1-channel or 3-channel image. +@param dst Destination image of the same size and type as src . +@param d Diameter of each pixel neighborhood that is used during filtering. If it is non-positive, +it is computed from sigmaSpace. +@param sigmaColor Filter sigma in the color space. A larger value of the parameter means that +farther colors within the pixel neighborhood (see sigmaSpace) will be mixed together, resulting +in larger areas of semi-equal color. +@param sigmaSpace Filter sigma in the coordinate space. A larger value of the parameter means that +farther pixels will influence each other as long as their colors are close enough (see sigmaColor +). When d\>0, it specifies the neighborhood size regardless of sigmaSpace. Otherwise, d is +proportional to sigmaSpace. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes + */ +CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d, + double sigmaColor, double sigmaSpace, + int borderType = BORDER_DEFAULT ); + +/** @brief Blurs an image using the box filter. + +The function smooths an image using the kernel: + +\f[\texttt{K} = \alpha \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \end{bmatrix}\f] + +where + +\f[\alpha = \begin{cases} \frac{1}{\texttt{ksize.width*ksize.height}} & \texttt{when } \texttt{normalize=true} \\1 & \texttt{otherwise}\end{cases}\f] + +Unnormalized box filter is useful for computing various integral characteristics over each pixel +neighborhood, such as covariance matrices of image derivatives (used in dense optical flow +algorithms, and so on). If you need to compute pixel sums over variable-size windows, use #integral. + +@param src input image. +@param dst output image of the same size and type as src. +@param ddepth the output image depth (-1 to use src.depth()). +@param ksize blurring kernel size. +@param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel +center. +@param normalize flag, specifying whether the kernel is normalized by its area or not. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. +@sa blur, bilateralFilter, GaussianBlur, medianBlur, integral + */ +CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth, + Size ksize, Point anchor = Point(-1,-1), + bool normalize = true, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates the normalized sum of squares of the pixel values overlapping the filter. + +For every pixel \f$ (x, y) \f$ in the source image, the function calculates the sum of squares of those neighboring +pixel values which overlap the filter placed over the pixel \f$ (x, y) \f$. + +The unnormalized square box filter can be useful in computing local image statistics such as the the local +variance and standard deviation around the neighborhood of a pixel. + +@param src input image +@param dst output image of the same size and type as _src +@param ddepth the output image depth (-1 to use src.depth()) +@param ksize kernel size +@param anchor kernel anchor point. The default value of Point(-1, -1) denotes that the anchor is at the kernel +center. +@param normalize flag, specifying whether the kernel is to be normalized by it's area or not. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. +@sa boxFilter +*/ +CV_EXPORTS_W void sqrBoxFilter( InputArray src, OutputArray dst, int ddepth, + Size ksize, Point anchor = Point(-1, -1), + bool normalize = true, + int borderType = BORDER_DEFAULT ); + +/** @brief Blurs an image using the normalized box filter. + +The function smooths an image using the kernel: + +\f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f] + +The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), ksize, +anchor, true, borderType)`. + +@param src input image; it can have any number of channels, which are processed independently, but +the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param ksize blurring kernel size. +@param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel +center. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. +@sa boxFilter, bilateralFilter, GaussianBlur, medianBlur + */ +CV_EXPORTS_W void blur( InputArray src, OutputArray dst, + Size ksize, Point anchor = Point(-1,-1), + int borderType = BORDER_DEFAULT ); + +/** @brief Convolves an image with the kernel. + +The function applies an arbitrary linear filter to an image. In-place operation is supported. When +the aperture is partially outside the image, the function interpolates outlier pixel values +according to the specified border mode. + +The function does actually compute correlation, not the convolution: + +\f[\texttt{dst} (x,y) = \sum _{ \substack{0\leq x' < \texttt{kernel.cols}\\{0\leq y' < \texttt{kernel.rows}}}} \texttt{kernel} (x',y')* \texttt{src} (x+x'- \texttt{anchor.x} ,y+y'- \texttt{anchor.y} )\f] + +That is, the kernel is not mirrored around the anchor point. If you need a real convolution, flip +the kernel using #flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows - +anchor.y - 1)`. + +The function uses the DFT-based algorithm in case of sufficiently large kernels (~`11 x 11` or +larger) and the direct algorithm for small kernels. + +@param src input image. +@param dst output image of the same size and the same number of channels as src. +@param ddepth desired depth of the destination image, see @ref filter_depths "combinations" +@param kernel convolution kernel (or rather a correlation kernel), a single-channel floating point +matrix; if you want to apply different kernels to different channels, split the image into +separate color planes using split and process them individually. +@param anchor anchor of the kernel that indicates the relative position of a filtered point within +the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor +is at the kernel center. +@param delta optional value added to the filtered pixels before storing them in dst. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa sepFilter2D, dft, matchTemplate + */ +CV_EXPORTS_W void filter2D( InputArray src, OutputArray dst, int ddepth, + InputArray kernel, Point anchor = Point(-1,-1), + double delta = 0, int borderType = BORDER_DEFAULT ); + +/** @brief Applies a separable linear filter to an image. + +The function applies a separable linear filter to the image. That is, first, every row of src is +filtered with the 1D kernel kernelX. Then, every column of the result is filtered with the 1D +kernel kernelY. The final result shifted by delta is stored in dst . + +@param src Source image. +@param dst Destination image of the same size and the same number of channels as src . +@param ddepth Destination image depth, see @ref filter_depths "combinations" +@param kernelX Coefficients for filtering each row. +@param kernelY Coefficients for filtering each column. +@param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor +is at the kernel center. +@param delta Value added to the filtered results before storing them. +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa filter2D, Sobel, GaussianBlur, boxFilter, blur + */ +CV_EXPORTS_W void sepFilter2D( InputArray src, OutputArray dst, int ddepth, + InputArray kernelX, InputArray kernelY, + Point anchor = Point(-1,-1), + double delta = 0, int borderType = BORDER_DEFAULT ); + +/** @example samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp +Sample code using Sobel and/or Scharr OpenCV functions to make a simple Edge Detector +![Sample screenshot](Sobel_Derivatives_Tutorial_Result.jpg) +Check @ref tutorial_sobel_derivatives "the corresponding tutorial" for more details +*/ + +/** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator. + +In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to +calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or \f$1 \times 3\f$ +kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first +or the second x- or y- derivatives. + +There is also the special value `ksize = #FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr +filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is + +\f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f] + +for the x-derivative, or transposed for the y-derivative. + +The function calculates an image derivative by convolving the image with the appropriate kernel: + +\f[\texttt{dst} = \frac{\partial^{xorder+yorder} \texttt{src}}{\partial x^{xorder} \partial y^{yorder}}\f] + +The Sobel operators combine Gaussian smoothing and differentiation, so the result is more or less +resistant to the noise. Most often, the function is called with ( xorder = 1, yorder = 0, ksize = 3) +or ( xorder = 0, yorder = 1, ksize = 3) to calculate the first x- or y- image derivative. The first +case corresponds to a kernel of: + +\f[\vecthreethree{-1}{0}{1}{-2}{0}{2}{-1}{0}{1}\f] + +The second case corresponds to a kernel of: + +\f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f] + +@param src input image. +@param dst output image of the same size and the same number of channels as src . +@param ddepth output image depth, see @ref filter_depths "combinations"; in the case of + 8-bit input images it will result in truncated derivatives. +@param dx order of the derivative x. +@param dy order of the derivative y. +@param ksize size of the extended Sobel kernel; it must be 1, 3, 5, or 7. +@param scale optional scale factor for the computed derivative values; by default, no scaling is +applied (see #getDerivKernels for details). +@param delta optional delta value that is added to the results prior to storing them in dst. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa Scharr, Laplacian, sepFilter2D, filter2D, GaussianBlur, cartToPolar + */ +CV_EXPORTS_W void Sobel( InputArray src, OutputArray dst, int ddepth, + int dx, int dy, int ksize = 3, + double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates the first order image derivative in both x and y using a Sobel operator + +Equivalent to calling: + +@code +Sobel( src, dx, CV_16SC1, 1, 0, 3 ); +Sobel( src, dy, CV_16SC1, 0, 1, 3 ); +@endcode + +@param src input image. +@param dx output image with first-order derivative in x. +@param dy output image with first-order derivative in y. +@param ksize size of Sobel kernel. It must be 3. +@param borderType pixel extrapolation method, see #BorderTypes. + Only #BORDER_DEFAULT=#BORDER_REFLECT_101 and #BORDER_REPLICATE are supported. + +@sa Sobel + */ + +CV_EXPORTS_W void spatialGradient( InputArray src, OutputArray dx, + OutputArray dy, int ksize = 3, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates the first x- or y- image derivative using Scharr operator. + +The function computes the first x- or y- spatial image derivative using the Scharr operator. The +call + +\f[\texttt{Scharr(src, dst, ddepth, dx, dy, scale, delta, borderType)}\f] + +is equivalent to + +\f[\texttt{Sobel(src, dst, ddepth, dx, dy, FILTER_SCHARR, scale, delta, borderType)} .\f] + +@param src input image. +@param dst output image of the same size and the same number of channels as src. +@param ddepth output image depth, see @ref filter_depths "combinations" +@param dx order of the derivative x. +@param dy order of the derivative y. +@param scale optional scale factor for the computed derivative values; by default, no scaling is +applied (see #getDerivKernels for details). +@param delta optional delta value that is added to the results prior to storing them in dst. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa cartToPolar + */ +CV_EXPORTS_W void Scharr( InputArray src, OutputArray dst, int ddepth, + int dx, int dy, double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT ); + +/** @example samples/cpp/laplace.cpp +An example using Laplace transformations for edge detection +*/ + +/** @brief Calculates the Laplacian of an image. + +The function calculates the Laplacian of the source image by adding up the second x and y +derivatives calculated using the Sobel operator: + +\f[\texttt{dst} = \Delta \texttt{src} = \frac{\partial^2 \texttt{src}}{\partial x^2} + \frac{\partial^2 \texttt{src}}{\partial y^2}\f] + +This is done when `ksize > 1`. When `ksize == 1`, the Laplacian is computed by filtering the image +with the following \f$3 \times 3\f$ aperture: + +\f[\vecthreethree {0}{1}{0}{1}{-4}{1}{0}{1}{0}\f] + +@param src Source image. +@param dst Destination image of the same size and the same number of channels as src . +@param ddepth Desired depth of the destination image. +@param ksize Aperture size used to compute the second-derivative filters. See #getDerivKernels for +details. The size must be positive and odd. +@param scale Optional scale factor for the computed Laplacian values. By default, no scaling is +applied. See #getDerivKernels for details. +@param delta Optional delta value that is added to the results prior to storing them in dst . +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa Sobel, Scharr + */ +CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth, + int ksize = 1, double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT ); + +//! @} imgproc_filter + +//! @addtogroup imgproc_feature +//! @{ + +/** @example samples/cpp/edge.cpp +This program demonstrates usage of the Canny edge detector + +Check @ref tutorial_canny_detector "the corresponding tutorial" for more details +*/ + +/** @brief Finds edges in an image using the Canny algorithm @cite Canny86 . + +The function finds edges in the input image and marks them in the output map edges using the +Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The +largest value is used to find initial segments of strong edges. See + + +@param image 8-bit input image. +@param edges output edge map; single channels 8-bit image, which has the same size as image . +@param threshold1 first threshold for the hysteresis procedure. +@param threshold2 second threshold for the hysteresis procedure. +@param apertureSize aperture size for the Sobel operator. +@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm +\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude ( +L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( +L2gradient=false ). + */ +CV_EXPORTS_W void Canny( InputArray image, OutputArray edges, + double threshold1, double threshold2, + int apertureSize = 3, bool L2gradient = false ); + +/** \overload + +Finds edges in an image using the Canny algorithm with custom image gradient. + +@param dx 16-bit x derivative of input image (CV_16SC1 or CV_16SC3). +@param dy 16-bit y derivative of input image (same type as dx). +@param edges output edge map; single channels 8-bit image, which has the same size as image . +@param threshold1 first threshold for the hysteresis procedure. +@param threshold2 second threshold for the hysteresis procedure. +@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm +\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude ( +L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( +L2gradient=false ). + */ +CV_EXPORTS_W void Canny( InputArray dx, InputArray dy, + OutputArray edges, + double threshold1, double threshold2, + bool L2gradient = false ); + +/** @brief Calculates the minimal eigenvalue of gradient matrices for corner detection. + +The function is similar to cornerEigenValsAndVecs but it calculates and stores only the minimal +eigenvalue of the covariance matrix of derivatives, that is, \f$\min(\lambda_1, \lambda_2)\f$ in terms +of the formulae in the cornerEigenValsAndVecs description. + +@param src Input single-channel 8-bit or floating-point image. +@param dst Image to store the minimal eigenvalues. It has the type CV_32FC1 and the same size as +src . +@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ). +@param ksize Aperture parameter for the Sobel operator. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + */ +CV_EXPORTS_W void cornerMinEigenVal( InputArray src, OutputArray dst, + int blockSize, int ksize = 3, + int borderType = BORDER_DEFAULT ); + +/** @brief Harris corner detector. + +The function runs the Harris corner detector on the image. Similarly to cornerMinEigenVal and +cornerEigenValsAndVecs , for each pixel \f$(x, y)\f$ it calculates a \f$2\times2\f$ gradient covariance +matrix \f$M^{(x,y)}\f$ over a \f$\texttt{blockSize} \times \texttt{blockSize}\f$ neighborhood. Then, it +computes the following characteristic: + +\f[\texttt{dst} (x,y) = \mathrm{det} M^{(x,y)} - k \cdot \left ( \mathrm{tr} M^{(x,y)} \right )^2\f] + +Corners in the image can be found as the local maxima of this response map. + +@param src Input single-channel 8-bit or floating-point image. +@param dst Image to store the Harris detector responses. It has the type CV_32FC1 and the same +size as src . +@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ). +@param ksize Aperture parameter for the Sobel operator. +@param k Harris detector free parameter. See the formula above. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + */ +CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize, + int ksize, double k, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates eigenvalues and eigenvectors of image blocks for corner detection. + +For every pixel \f$p\f$ , the function cornerEigenValsAndVecs considers a blockSize \f$\times\f$ blockSize +neighborhood \f$S(p)\f$ . It calculates the covariation matrix of derivatives over the neighborhood as: + +\f[M = \begin{bmatrix} \sum _{S(p)}(dI/dx)^2 & \sum _{S(p)}dI/dx dI/dy \\ \sum _{S(p)}dI/dx dI/dy & \sum _{S(p)}(dI/dy)^2 \end{bmatrix}\f] + +where the derivatives are computed using the Sobel operator. + +After that, it finds eigenvectors and eigenvalues of \f$M\f$ and stores them in the destination image as +\f$(\lambda_1, \lambda_2, x_1, y_1, x_2, y_2)\f$ where + +- \f$\lambda_1, \lambda_2\f$ are the non-sorted eigenvalues of \f$M\f$ +- \f$x_1, y_1\f$ are the eigenvectors corresponding to \f$\lambda_1\f$ +- \f$x_2, y_2\f$ are the eigenvectors corresponding to \f$\lambda_2\f$ + +The output of the function can be used for robust edge or corner detection. + +@param src Input single-channel 8-bit or floating-point image. +@param dst Image to store the results. It has the same size as src and the type CV_32FC(6) . +@param blockSize Neighborhood size (see details below). +@param ksize Aperture parameter for the Sobel operator. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + +@sa cornerMinEigenVal, cornerHarris, preCornerDetect + */ +CV_EXPORTS_W void cornerEigenValsAndVecs( InputArray src, OutputArray dst, + int blockSize, int ksize, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates a feature map for corner detection. + +The function calculates the complex spatial derivative-based function of the source image + +\f[\texttt{dst} = (D_x \texttt{src} )^2 \cdot D_{yy} \texttt{src} + (D_y \texttt{src} )^2 \cdot D_{xx} \texttt{src} - 2 D_x \texttt{src} \cdot D_y \texttt{src} \cdot D_{xy} \texttt{src}\f] + +where \f$D_x\f$,\f$D_y\f$ are the first image derivatives, \f$D_{xx}\f$,\f$D_{yy}\f$ are the second image +derivatives, and \f$D_{xy}\f$ is the mixed derivative. + +The corners can be found as local maximums of the functions, as shown below: +@code + Mat corners, dilated_corners; + preCornerDetect(image, corners, 3); + // dilation with 3x3 rectangular structuring element + dilate(corners, dilated_corners, Mat(), 1); + Mat corner_mask = corners == dilated_corners; +@endcode + +@param src Source single-channel 8-bit of floating-point image. +@param dst Output image that has the type CV_32F and the same size as src . +@param ksize %Aperture size of the Sobel . +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + */ +CV_EXPORTS_W void preCornerDetect( InputArray src, OutputArray dst, int ksize, + int borderType = BORDER_DEFAULT ); + +/** @brief Refines the corner locations. + +The function iterates to find the sub-pixel accurate location of corners or radial saddle +points as described in @cite forstner1987fast, and as shown on the figure below. + +![image](pics/cornersubpix.png) + +Sub-pixel accurate corner locator is based on the observation that every vector from the center \f$q\f$ +to a point \f$p\f$ located within a neighborhood of \f$q\f$ is orthogonal to the image gradient at \f$p\f$ +subject to image and measurement noise. Consider the expression: + +\f[\epsilon _i = {DI_{p_i}}^T \cdot (q - p_i)\f] + +where \f${DI_{p_i}}\f$ is an image gradient at one of the points \f$p_i\f$ in a neighborhood of \f$q\f$ . The +value of \f$q\f$ is to be found so that \f$\epsilon_i\f$ is minimized. A system of equations may be set up +with \f$\epsilon_i\f$ set to zero: + +\f[\sum _i(DI_{p_i} \cdot {DI_{p_i}}^T) \cdot q - \sum _i(DI_{p_i} \cdot {DI_{p_i}}^T \cdot p_i)\f] + +where the gradients are summed within a neighborhood ("search window") of \f$q\f$ . Calling the first +gradient term \f$G\f$ and the second gradient term \f$b\f$ gives: + +\f[q = G^{-1} \cdot b\f] + +The algorithm sets the center of the neighborhood window at this new center \f$q\f$ and then iterates +until the center stays within a set threshold. + +@param image Input single-channel, 8-bit or float image. +@param corners Initial coordinates of the input corners and refined coordinates provided for +output. +@param winSize Half of the side length of the search window. For example, if winSize=Size(5,5) , +then a \f$(5*2+1) \times (5*2+1) = 11 \times 11\f$ search window is used. +@param zeroZone Half of the size of the dead region in the middle of the search zone over which +the summation in the formula below is not done. It is used sometimes to avoid possible +singularities of the autocorrelation matrix. The value of (-1,-1) indicates that there is no such +a size. +@param criteria Criteria for termination of the iterative process of corner refinement. That is, +the process of corner position refinement stops either after criteria.maxCount iterations or when +the corner position moves by less than criteria.epsilon on some iteration. + */ +CV_EXPORTS_W void cornerSubPix( InputArray image, InputOutputArray corners, + Size winSize, Size zeroZone, + TermCriteria criteria ); + +/** @brief Determines strong corners on an image. + +The function finds the most prominent corners in the image or in the specified image region, as +described in @cite Shi94 + +- Function calculates the corner quality measure at every source image pixel using the + #cornerMinEigenVal or #cornerHarris . +- Function performs a non-maximum suppression (the local maximums in *3 x 3* neighborhood are + retained). +- The corners with the minimal eigenvalue less than + \f$\texttt{qualityLevel} \cdot \max_{x,y} qualityMeasureMap(x,y)\f$ are rejected. +- The remaining corners are sorted by the quality measure in the descending order. +- Function throws away each corner for which there is a stronger corner at a distance less than + maxDistance. + +The function can be used to initialize a point-based tracker of an object. + +@note If the function is called with different values A and B of the parameter qualityLevel , and +A \> B, the vector of returned corners with qualityLevel=A will be the prefix of the output vector +with qualityLevel=B . + +@param image Input 8-bit or floating-point 32-bit, single-channel image. +@param corners Output vector of detected corners. +@param maxCorners Maximum number of corners to return. If there are more corners than are found, +the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set +and all detected corners are returned. +@param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The +parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue +(see #cornerMinEigenVal ) or the Harris function response (see #cornerHarris ). The corners with the +quality measure less than the product are rejected. For example, if the best corner has the +quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure +less than 15 are rejected. +@param minDistance Minimum possible Euclidean distance between the returned corners. +@param mask Optional region of interest. If the image is not empty (it needs to have the type +CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected. +@param blockSize Size of an average block for computing a derivative covariation matrix over each +pixel neighborhood. See cornerEigenValsAndVecs . +@param useHarrisDetector Parameter indicating whether to use a Harris detector (see #cornerHarris) +or #cornerMinEigenVal. +@param k Free parameter of the Harris detector. + +@sa cornerMinEigenVal, cornerHarris, calcOpticalFlowPyrLK, estimateRigidTransform, + */ + +CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners, + int maxCorners, double qualityLevel, double minDistance, + InputArray mask = noArray(), int blockSize = 3, + bool useHarrisDetector = false, double k = 0.04 ); + +CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners, + int maxCorners, double qualityLevel, double minDistance, + InputArray mask, int blockSize, + int gradientSize, bool useHarrisDetector = false, + double k = 0.04 ); +/** @example samples/cpp/tutorial_code/ImgTrans/houghlines.cpp +An example using the Hough line detector +![Sample input image](Hough_Lines_Tutorial_Original_Image.jpg) ![Output image](Hough_Lines_Tutorial_Result.jpg) +*/ + +/** @brief Finds lines in a binary image using the standard Hough transform. + +The function implements the standard or standard multi-scale Hough transform algorithm for line +detection. See for a good explanation of Hough +transform. + +@param image 8-bit, single-channel binary source image. The image may be modified by the function. +@param lines Output vector of lines. Each line is represented by a 2 or 3 element vector +\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \textrm{votes})\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of +the image). \f$\theta\f$ is the line rotation angle in radians ( +\f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ). +\f$\textrm{votes}\f$ is the value of accumulator. +@param rho Distance resolution of the accumulator in pixels. +@param theta Angle resolution of the accumulator in radians. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ). +@param srn For the multi-scale Hough transform, it is a divisor for the distance resolution rho . +The coarse accumulator distance resolution is rho and the accurate accumulator resolution is +rho/srn . If both srn=0 and stn=0 , the classical Hough transform is used. Otherwise, both these +parameters should be positive. +@param stn For the multi-scale Hough transform, it is a divisor for the distance resolution theta. +@param min_theta For standard and multi-scale Hough transform, minimum angle to check for lines. +Must fall between 0 and max_theta. +@param max_theta For standard and multi-scale Hough transform, maximum angle to check for lines. +Must fall between min_theta and CV_PI. + */ +CV_EXPORTS_W void HoughLines( InputArray image, OutputArray lines, + double rho, double theta, int threshold, + double srn = 0, double stn = 0, + double min_theta = 0, double max_theta = CV_PI ); + +/** @brief Finds line segments in a binary image using the probabilistic Hough transform. + +The function implements the probabilistic Hough transform algorithm for line detection, described +in @cite Matas00 + +See the line detection example below: +@include snippets/imgproc_HoughLinesP.cpp +This is a sample picture the function parameters have been tuned for: + +![image](pics/building.jpg) + +And this is the output of the above program in case of the probabilistic Hough transform: + +![image](pics/houghp.png) + +@param image 8-bit, single-channel binary source image. The image may be modified by the function. +@param lines Output vector of lines. Each line is represented by a 4-element vector +\f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected +line segment. +@param rho Distance resolution of the accumulator in pixels. +@param theta Angle resolution of the accumulator in radians. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ). +@param minLineLength Minimum line length. Line segments shorter than that are rejected. +@param maxLineGap Maximum allowed gap between points on the same line to link them. + +@sa LineSegmentDetector + */ +CV_EXPORTS_W void HoughLinesP( InputArray image, OutputArray lines, + double rho, double theta, int threshold, + double minLineLength = 0, double maxLineGap = 0 ); + +/** @brief Finds lines in a set of points using the standard Hough transform. + +The function finds lines in a set of points using a modification of the Hough transform. +@include snippets/imgproc_HoughLinesPointSet.cpp +@param _point Input vector of points. Each vector must be encoded as a Point vector \f$(x,y)\f$. Type must be CV_32FC2 or CV_32SC2. +@param _lines Output vector of found lines. Each vector is encoded as a vector \f$(votes, rho, theta)\f$. +The larger the value of 'votes', the higher the reliability of the Hough line. +@param lines_max Max count of hough lines. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ) +@param min_rho Minimum Distance value of the accumulator in pixels. +@param max_rho Maximum Distance value of the accumulator in pixels. +@param rho_step Distance resolution of the accumulator in pixels. +@param min_theta Minimum angle value of the accumulator in radians. +@param max_theta Maximum angle value of the accumulator in radians. +@param theta_step Angle resolution of the accumulator in radians. + */ +CV_EXPORTS_W void HoughLinesPointSet( InputArray _point, OutputArray _lines, int lines_max, int threshold, + double min_rho, double max_rho, double rho_step, + double min_theta, double max_theta, double theta_step ); + +/** @example samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp +An example using the Hough circle detector +*/ + +/** @brief Finds circles in a grayscale image using the Hough transform. + +The function finds circles in a grayscale image using a modification of the Hough transform. + +Example: : +@include snippets/imgproc_HoughLinesCircles.cpp + +@note Usually the function detects the centers of circles well. However, it may fail to find correct +radii. You can assist to the function by specifying the radius range ( minRadius and maxRadius ) if +you know it. Or, in the case of #HOUGH_GRADIENT method you may set maxRadius to a negative number +to return centers only without radius search, and find the correct radius using an additional procedure. + +It also helps to smooth image a bit unless it's already soft. For example, +GaussianBlur() with 7x7 kernel and 1.5x1.5 sigma or similar blurring may help. + +@param image 8-bit, single-channel, grayscale input image. +@param circles Output vector of found circles. Each vector is encoded as 3 or 4 element +floating-point vector \f$(x, y, radius)\f$ or \f$(x, y, radius, votes)\f$ . +@param method Detection method, see #HoughModes. The available methods are #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT. +@param dp Inverse ratio of the accumulator resolution to the image resolution. For example, if +dp=1 , the accumulator has the same resolution as the input image. If dp=2 , the accumulator has +half as big width and height. For #HOUGH_GRADIENT_ALT the recommended value is dp=1.5, +unless some small very circles need to be detected. +@param minDist Minimum distance between the centers of the detected circles. If the parameter is +too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is +too large, some circles may be missed. +@param param1 First method-specific parameter. In case of #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT, +it is the higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller). +Note that #HOUGH_GRADIENT_ALT uses #Scharr algorithm to compute image derivatives, so the threshold value +shough normally be higher, such as 300 or normally exposed and contrasty images. +@param param2 Second method-specific parameter. In case of #HOUGH_GRADIENT, it is the +accumulator threshold for the circle centers at the detection stage. The smaller it is, the more +false circles may be detected. Circles, corresponding to the larger accumulator values, will be +returned first. In the case of #HOUGH_GRADIENT_ALT algorithm, this is the circle "perfectness" measure. +The closer it to 1, the better shaped circles algorithm selects. In most cases 0.9 should be fine. +If you want get better detection of small circles, you may decrease it to 0.85, 0.8 or even less. +But then also try to limit the search range [minRadius, maxRadius] to avoid many false circles. +@param minRadius Minimum circle radius. +@param maxRadius Maximum circle radius. If <= 0, uses the maximum image dimension. If < 0, #HOUGH_GRADIENT returns +centers without finding the radius. #HOUGH_GRADIENT_ALT always computes circle radiuses. + +@sa fitEllipse, minEnclosingCircle + */ +CV_EXPORTS_W void HoughCircles( InputArray image, OutputArray circles, + int method, double dp, double minDist, + double param1 = 100, double param2 = 100, + int minRadius = 0, int maxRadius = 0 ); + +//! @} imgproc_feature + +//! @addtogroup imgproc_filter +//! @{ + +/** @example samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp +Advanced morphology Transformations sample code +![Sample screenshot](Morphology_2_Tutorial_Result.jpg) +Check @ref tutorial_opening_closing_hats "the corresponding tutorial" for more details +*/ + +/** @brief Erodes an image by using a specific structuring element. + +The function erodes the source image using the specified structuring element that determines the +shape of a pixel neighborhood over which the minimum is taken: + +\f[\texttt{dst} (x,y) = \min _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f] + +The function supports the in-place mode. Erosion can be applied several ( iterations ) times. In +case of multi-channel images, each channel is processed independently. + +@param src input image; the number of channels can be arbitrary, but the depth should be one of +CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param kernel structuring element used for erosion; if `element=Mat()`, a `3 x 3` rectangular +structuring element is used. Kernel can be created using #getStructuringElement. +@param anchor position of the anchor within the element; default value (-1, -1) means that the +anchor is at the element center. +@param iterations number of times erosion is applied. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@param borderValue border value in case of a constant border +@sa dilate, morphologyEx, getStructuringElement + */ +CV_EXPORTS_W void erode( InputArray src, OutputArray dst, InputArray kernel, + Point anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue() ); + +/** @example samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp +Erosion and Dilation sample code +![Sample Screenshot-Erosion](Morphology_1_Tutorial_Erosion_Result.jpg)![Sample Screenshot-Dilation](Morphology_1_Tutorial_Dilation_Result.jpg) +Check @ref tutorial_erosion_dilatation "the corresponding tutorial" for more details +*/ + +/** @brief Dilates an image by using a specific structuring element. + +The function dilates the source image using the specified structuring element that determines the +shape of a pixel neighborhood over which the maximum is taken: +\f[\texttt{dst} (x,y) = \max _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f] + +The function supports the in-place mode. Dilation can be applied several ( iterations ) times. In +case of multi-channel images, each channel is processed independently. + +@param src input image; the number of channels can be arbitrary, but the depth should be one of +CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular +structuring element is used. Kernel can be created using #getStructuringElement +@param anchor position of the anchor within the element; default value (-1, -1) means that the +anchor is at the element center. +@param iterations number of times dilation is applied. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not suported. +@param borderValue border value in case of a constant border +@sa erode, morphologyEx, getStructuringElement + */ +CV_EXPORTS_W void dilate( InputArray src, OutputArray dst, InputArray kernel, + Point anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue() ); + +/** @brief Performs advanced morphological transformations. + +The function cv::morphologyEx can perform advanced morphological transformations using an erosion and dilation as +basic operations. + +Any of the operations can be done in-place. In case of multi-channel images, each channel is +processed independently. + +@param src Source image. The number of channels can be arbitrary. The depth should be one of +CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst Destination image of the same size and type as source image. +@param op Type of a morphological operation, see #MorphTypes +@param kernel Structuring element. It can be created using #getStructuringElement. +@param anchor Anchor position with the kernel. Negative values mean that the anchor is at the +kernel center. +@param iterations Number of times erosion and dilation are applied. +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@param borderValue Border value in case of a constant border. The default value has a special +meaning. +@sa dilate, erode, getStructuringElement +@note The number of iterations is the number of times erosion or dilatation operation will be applied. +For instance, an opening operation (#MORPH_OPEN) with two iterations is equivalent to apply +successively: erode -> erode -> dilate -> dilate (and not erode -> dilate -> erode -> dilate). + */ +CV_EXPORTS_W void morphologyEx( InputArray src, OutputArray dst, + int op, InputArray kernel, + Point anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue() ); + +//! @} imgproc_filter + +//! @addtogroup imgproc_transform +//! @{ + +/** @brief Resizes an image. + +The function resize resizes the image src down to or up to the specified size. Note that the +initial dst type or size are not taken into account. Instead, the size and type are derived from +the `src`,`dsize`,`fx`, and `fy`. If you want to resize src so that it fits the pre-created dst, +you may call the function as follows: +@code + // explicitly specify dsize=dst.size(); fx and fy will be computed from that. + resize(src, dst, dst.size(), 0, 0, interpolation); +@endcode +If you want to decimate the image by factor of 2 in each direction, you can call the function this +way: +@code + // specify fx and fy and let the function compute the destination image size. + resize(src, dst, Size(), 0.5, 0.5, interpolation); +@endcode +To shrink an image, it will generally look best with #INTER_AREA interpolation, whereas to +enlarge an image, it will generally look best with c#INTER_CUBIC (slow) or #INTER_LINEAR +(faster but still looks OK). + +@param src input image. +@param dst output image; it has the size dsize (when it is non-zero) or the size computed from +src.size(), fx, and fy; the type of dst is the same as of src. +@param dsize output image size; if it equals zero, it is computed as: + \f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] + Either dsize or both fx and fy must be non-zero. +@param fx scale factor along the horizontal axis; when it equals 0, it is computed as +\f[\texttt{(double)dsize.width/src.cols}\f] +@param fy scale factor along the vertical axis; when it equals 0, it is computed as +\f[\texttt{(double)dsize.height/src.rows}\f] +@param interpolation interpolation method, see #InterpolationFlags + +@sa warpAffine, warpPerspective, remap + */ +CV_EXPORTS_W void resize( InputArray src, OutputArray dst, + Size dsize, double fx = 0, double fy = 0, + int interpolation = INTER_LINEAR ); + +/** @brief Applies an affine transformation to an image. + +The function warpAffine transforms the source image using the specified matrix: + +\f[\texttt{dst} (x,y) = \texttt{src} ( \texttt{M} _{11} x + \texttt{M} _{12} y + \texttt{M} _{13}, \texttt{M} _{21} x + \texttt{M} _{22} y + \texttt{M} _{23})\f] + +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted +with #invertAffineTransform and then put in the formula above instead of M. The function cannot +operate in-place. + +@param src input image. +@param dst output image that has the size dsize and the same type as src . +@param M \f$2\times 3\f$ transformation matrix. +@param dsize size of the output image. +@param flags combination of interpolation methods (see #InterpolationFlags) and the optional +flag #WARP_INVERSE_MAP that means that M is the inverse transformation ( +\f$\texttt{dst}\rightarrow\texttt{src}\f$ ). +@param borderMode pixel extrapolation method (see #BorderTypes); when +borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to +the "outliers" in the source image are not modified by the function. +@param borderValue value used in case of a constant border; by default, it is 0. + +@sa warpPerspective, resize, remap, getRectSubPix, transform + */ +CV_EXPORTS_W void warpAffine( InputArray src, OutputArray dst, + InputArray M, Size dsize, + int flags = INTER_LINEAR, + int borderMode = BORDER_CONSTANT, + const Scalar& borderValue = Scalar()); + +/** @example samples/cpp/warpPerspective_demo.cpp +An example program shows using cv::getPerspectiveTransform and cv::warpPerspective for image warping +*/ + +/** @brief Applies a perspective transformation to an image. + +The function warpPerspective transforms the source image using the specified matrix: + +\f[\texttt{dst} (x,y) = \texttt{src} \left ( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} , + \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right )\f] + +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert +and then put in the formula above instead of M. The function cannot operate in-place. + +@param src input image. +@param dst output image that has the size dsize and the same type as src . +@param M \f$3\times 3\f$ transformation matrix. +@param dsize size of the output image. +@param flags combination of interpolation methods (#INTER_LINEAR or #INTER_NEAREST) and the +optional flag #WARP_INVERSE_MAP, that sets M as the inverse transformation ( +\f$\texttt{dst}\rightarrow\texttt{src}\f$ ). +@param borderMode pixel extrapolation method (#BORDER_CONSTANT or #BORDER_REPLICATE). +@param borderValue value used in case of a constant border; by default, it equals 0. + +@sa warpAffine, resize, remap, getRectSubPix, perspectiveTransform + */ +CV_EXPORTS_W void warpPerspective( InputArray src, OutputArray dst, + InputArray M, Size dsize, + int flags = INTER_LINEAR, + int borderMode = BORDER_CONSTANT, + const Scalar& borderValue = Scalar()); + +/** @brief Applies a generic geometrical transformation to an image. + +The function remap transforms the source image using the specified map: + +\f[\texttt{dst} (x,y) = \texttt{src} (map_x(x,y),map_y(x,y))\f] + +where values of pixels with non-integer coordinates are computed using one of available +interpolation methods. \f$map_x\f$ and \f$map_y\f$ can be encoded as separate floating-point maps +in \f$map_1\f$ and \f$map_2\f$ respectively, or interleaved floating-point maps of \f$(x,y)\f$ in +\f$map_1\f$, or fixed-point maps created by using convertMaps. The reason you might want to +convert from floating to fixed-point representations of a map is that they can yield much faster +(\~2x) remapping operations. In the converted case, \f$map_1\f$ contains pairs (cvFloor(x), +cvFloor(y)) and \f$map_2\f$ contains indices in a table of interpolation coefficients. + +This function cannot operate in-place. + +@param src Source image. +@param dst Destination image. It has the same size as map1 and the same type as src . +@param map1 The first map of either (x,y) points or just x values having the type CV_16SC2 , +CV_32FC1, or CV_32FC2. See convertMaps for details on converting a floating point +representation to fixed-point for speed. +@param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map +if map1 is (x,y) points), respectively. +@param interpolation Interpolation method (see #InterpolationFlags). The methods #INTER_AREA +and #INTER_LINEAR_EXACT are not supported by this function. +@param borderMode Pixel extrapolation method (see #BorderTypes). When +borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image that +corresponds to the "outliers" in the source image are not modified by the function. +@param borderValue Value used in case of a constant border. By default, it is 0. +@note +Due to current implementation limitations the size of an input and output images should be less than 32767x32767. + */ +CV_EXPORTS_W void remap( InputArray src, OutputArray dst, + InputArray map1, InputArray map2, + int interpolation, int borderMode = BORDER_CONSTANT, + const Scalar& borderValue = Scalar()); + +/** @brief Converts image transformation maps from one representation to another. + +The function converts a pair of maps for remap from one representation to another. The following +options ( (map1.type(), map2.type()) \f$\rightarrow\f$ (dstmap1.type(), dstmap2.type()) ) are +supported: + +- \f$\texttt{(CV_32FC1, CV_32FC1)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. This is the +most frequently used conversion operation, in which the original floating-point maps (see remap ) +are converted to a more compact and much faster fixed-point representation. The first output array +contains the rounded coordinates and the second array (created only when nninterpolation=false ) +contains indices in the interpolation tables. + +- \f$\texttt{(CV_32FC2)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. The same as above but +the original maps are stored in one 2-channel matrix. + +- Reverse conversion. Obviously, the reconstructed floating-point maps will not be exactly the same +as the originals. + +@param map1 The first input map of type CV_16SC2, CV_32FC1, or CV_32FC2 . +@param map2 The second input map of type CV_16UC1, CV_32FC1, or none (empty matrix), +respectively. +@param dstmap1 The first output map that has the type dstmap1type and the same size as src . +@param dstmap2 The second output map. +@param dstmap1type Type of the first output map that should be CV_16SC2, CV_32FC1, or +CV_32FC2 . +@param nninterpolation Flag indicating whether the fixed-point maps are used for the +nearest-neighbor or for a more complex interpolation. + +@sa remap, undistort, initUndistortRectifyMap + */ +CV_EXPORTS_W void convertMaps( InputArray map1, InputArray map2, + OutputArray dstmap1, OutputArray dstmap2, + int dstmap1type, bool nninterpolation = false ); + +/** @brief Calculates an affine matrix of 2D rotation. + +The function calculates the following matrix: + +\f[\begin{bmatrix} \alpha & \beta & (1- \alpha ) \cdot \texttt{center.x} - \beta \cdot \texttt{center.y} \\ - \beta & \alpha & \beta \cdot \texttt{center.x} + (1- \alpha ) \cdot \texttt{center.y} \end{bmatrix}\f] + +where + +\f[\begin{array}{l} \alpha = \texttt{scale} \cdot \cos \texttt{angle} , \\ \beta = \texttt{scale} \cdot \sin \texttt{angle} \end{array}\f] + +The transformation maps the rotation center to itself. If this is not the target, adjust the shift. + +@param center Center of the rotation in the source image. +@param angle Rotation angle in degrees. Positive values mean counter-clockwise rotation (the +coordinate origin is assumed to be the top-left corner). +@param scale Isotropic scale factor. + +@sa getAffineTransform, warpAffine, transform + */ +CV_EXPORTS_W Mat getRotationMatrix2D(Point2f center, double angle, double scale); + +/** @sa getRotationMatrix2D */ +CV_EXPORTS Matx23d getRotationMatrix2D_(Point2f center, double angle, double scale); + +inline +Mat getRotationMatrix2D(Point2f center, double angle, double scale) +{ + return Mat(getRotationMatrix2D_(center, angle, scale), true); +} + +/** @brief Calculates an affine transform from three pairs of the corresponding points. + +The function calculates the \f$2 \times 3\f$ matrix of an affine transform so that: + +\f[\begin{bmatrix} x'_i \\ y'_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] + +where + +\f[dst(i)=(x'_i,y'_i), src(i)=(x_i, y_i), i=0,1,2\f] + +@param src Coordinates of triangle vertices in the source image. +@param dst Coordinates of the corresponding triangle vertices in the destination image. + +@sa warpAffine, transform + */ +CV_EXPORTS Mat getAffineTransform( const Point2f src[], const Point2f dst[] ); + +/** @brief Inverts an affine transformation. + +The function computes an inverse affine transformation represented by \f$2 \times 3\f$ matrix M: + +\f[\begin{bmatrix} a_{11} & a_{12} & b_1 \\ a_{21} & a_{22} & b_2 \end{bmatrix}\f] + +The result is also a \f$2 \times 3\f$ matrix of the same type as M. + +@param M Original affine transformation. +@param iM Output reverse affine transformation. + */ +CV_EXPORTS_W void invertAffineTransform( InputArray M, OutputArray iM ); + +/** @brief Calculates a perspective transform from four pairs of the corresponding points. + +The function calculates the \f$3 \times 3\f$ matrix of a perspective transform so that: + +\f[\begin{bmatrix} t_i x'_i \\ t_i y'_i \\ t_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] + +where + +\f[dst(i)=(x'_i,y'_i), src(i)=(x_i, y_i), i=0,1,2,3\f] + +@param src Coordinates of quadrangle vertices in the source image. +@param dst Coordinates of the corresponding quadrangle vertices in the destination image. +@param solveMethod method passed to cv::solve (#DecompTypes) + +@sa findHomography, warpPerspective, perspectiveTransform + */ +CV_EXPORTS_W Mat getPerspectiveTransform(InputArray src, InputArray dst, int solveMethod = DECOMP_LU); + +/** @overload */ +CV_EXPORTS Mat getPerspectiveTransform(const Point2f src[], const Point2f dst[], int solveMethod = DECOMP_LU); + + +CV_EXPORTS_W Mat getAffineTransform( InputArray src, InputArray dst ); + +/** @brief Retrieves a pixel rectangle from an image with sub-pixel accuracy. + +The function getRectSubPix extracts pixels from src: + +\f[patch(x, y) = src(x + \texttt{center.x} - ( \texttt{dst.cols} -1)*0.5, y + \texttt{center.y} - ( \texttt{dst.rows} -1)*0.5)\f] + +where the values of the pixels at non-integer coordinates are retrieved using bilinear +interpolation. Every channel of multi-channel images is processed independently. Also +the image should be a single channel or three channel image. While the center of the +rectangle must be inside the image, parts of the rectangle may be outside. + +@param image Source image. +@param patchSize Size of the extracted patch. +@param center Floating point coordinates of the center of the extracted rectangle within the +source image. The center must be inside the image. +@param patch Extracted patch that has the size patchSize and the same number of channels as src . +@param patchType Depth of the extracted pixels. By default, they have the same depth as src . + +@sa warpAffine, warpPerspective + */ +CV_EXPORTS_W void getRectSubPix( InputArray image, Size patchSize, + Point2f center, OutputArray patch, int patchType = -1 ); + +/** @example samples/cpp/polar_transforms.cpp +An example using the cv::linearPolar and cv::logPolar operations +*/ + +/** @brief Remaps an image to semilog-polar coordinates space. + +@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags+WARP_POLAR_LOG); + +@internal +Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image d)"): +\f[\begin{array}{l} + dst( \rho , \phi ) = src(x,y) \\ + dst.size() \leftarrow src.size() +\end{array}\f] + +where +\f[\begin{array}{l} + I = (dx,dy) = (x - center.x,y - center.y) \\ + \rho = M \cdot log_e(\texttt{magnitude} (I)) ,\\ + \phi = Kangle \cdot \texttt{angle} (I) \\ +\end{array}\f] + +and +\f[\begin{array}{l} + M = src.cols / log_e(maxRadius) \\ + Kangle = src.rows / 2\Pi \\ +\end{array}\f] + +The function emulates the human "foveal" vision and can be used for fast scale and +rotation-invariant template matching, for object tracking and so forth. +@param src Source image +@param dst Destination image. It will have same size and type as src. +@param center The transformation center; where the output precision is maximal +@param M Magnitude scale parameter. It determines the radius of the bounding circle to transform too. +@param flags A combination of interpolation methods, see #InterpolationFlags + +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. + +@sa cv::linearPolar +@endinternal +*/ +CV_EXPORTS_W void logPolar( InputArray src, OutputArray dst, + Point2f center, double M, int flags ); + +/** @brief Remaps an image to polar coordinates space. + +@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags) + +@internal +Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image c)"): +\f[\begin{array}{l} + dst( \rho , \phi ) = src(x,y) \\ + dst.size() \leftarrow src.size() +\end{array}\f] + +where +\f[\begin{array}{l} + I = (dx,dy) = (x - center.x,y - center.y) \\ + \rho = Kmag \cdot \texttt{magnitude} (I) ,\\ + \phi = angle \cdot \texttt{angle} (I) +\end{array}\f] + +and +\f[\begin{array}{l} + Kx = src.cols / maxRadius \\ + Ky = src.rows / 2\Pi +\end{array}\f] + + +@param src Source image +@param dst Destination image. It will have same size and type as src. +@param center The transformation center; +@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too. +@param flags A combination of interpolation methods, see #InterpolationFlags + +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. + +@sa cv::logPolar +@endinternal +*/ +CV_EXPORTS_W void linearPolar( InputArray src, OutputArray dst, + Point2f center, double maxRadius, int flags ); + + +/** \brief Remaps an image to polar or semilog-polar coordinates space + +@anchor polar_remaps_reference_image +![Polar remaps reference](pics/polar_remap_doc.png) + +Transform the source image using the following transformation: +\f[ +dst(\rho , \phi ) = src(x,y) +\f] + +where +\f[ +\begin{array}{l} +\vec{I} = (x - center.x, \;y - center.y) \\ +\phi = Kangle \cdot \texttt{angle} (\vec{I}) \\ +\rho = \left\{\begin{matrix} +Klin \cdot \texttt{magnitude} (\vec{I}) & default \\ +Klog \cdot log_e(\texttt{magnitude} (\vec{I})) & if \; semilog \\ +\end{matrix}\right. +\end{array} +\f] + +and +\f[ +\begin{array}{l} +Kangle = dsize.height / 2\Pi \\ +Klin = dsize.width / maxRadius \\ +Klog = dsize.width / log_e(maxRadius) \\ +\end{array} +\f] + + +\par Linear vs semilog mapping + +Polar mapping can be linear or semi-log. Add one of #WarpPolarMode to `flags` to specify the polar mapping mode. + +Linear is the default mode. + +The semilog mapping emulates the human "foveal" vision that permit very high acuity on the line of sight (central vision) +in contrast to peripheral vision where acuity is minor. + +\par Option on `dsize`: + +- if both values in `dsize <=0 ` (default), +the destination image will have (almost) same area of source bounding circle: +\f[\begin{array}{l} +dsize.area \leftarrow (maxRadius^2 \cdot \Pi) \\ +dsize.width = \texttt{cvRound}(maxRadius) \\ +dsize.height = \texttt{cvRound}(maxRadius \cdot \Pi) \\ +\end{array}\f] + + +- if only `dsize.height <= 0`, +the destination image area will be proportional to the bounding circle area but scaled by `Kx * Kx`: +\f[\begin{array}{l} +dsize.height = \texttt{cvRound}(dsize.width \cdot \Pi) \\ +\end{array} +\f] + +- if both values in `dsize > 0 `, +the destination image will have the given size therefore the area of the bounding circle will be scaled to `dsize`. + + +\par Reverse mapping + +You can get reverse mapping adding #WARP_INVERSE_MAP to `flags` +\snippet polar_transforms.cpp InverseMap + +In addiction, to calculate the original coordinate from a polar mapped coordinate \f$(rho, phi)->(x, y)\f$: +\snippet polar_transforms.cpp InverseCoordinate + +@param src Source image. +@param dst Destination image. It will have same type as src. +@param dsize The destination image size (see description for valid options). +@param center The transformation center. +@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too. +@param flags A combination of interpolation methods, #InterpolationFlags + #WarpPolarMode. + - Add #WARP_POLAR_LINEAR to select linear polar mapping (default) + - Add #WARP_POLAR_LOG to select semilog polar mapping + - Add #WARP_INVERSE_MAP for reverse mapping. +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. +- This function uses #remap. Due to current implementation limitations the size of an input and output images should be less than 32767x32767. + +@sa cv::remap +*/ +CV_EXPORTS_W void warpPolar(InputArray src, OutputArray dst, Size dsize, + Point2f center, double maxRadius, int flags); + + +//! @} imgproc_transform + +//! @addtogroup imgproc_misc +//! @{ + +/** @overload */ +CV_EXPORTS_W void integral( InputArray src, OutputArray sum, int sdepth = -1 ); + +/** @overload */ +CV_EXPORTS_AS(integral2) void integral( InputArray src, OutputArray sum, + OutputArray sqsum, int sdepth = -1, int sqdepth = -1 ); + +/** @brief Calculates the integral of an image. + +The function calculates one or more integral images for the source image as follows: + +\f[\texttt{sum} (X,Y) = \sum _{x + +Calculates the cross-power spectrum of two supplied source arrays. The arrays are padded if needed +with getOptimalDFTSize. + +The function performs the following equations: +- First it applies a Hanning window (see ) to each +image to remove possible edge effects. This window is cached until the array size changes to speed +up processing time. +- Next it computes the forward DFTs of each source array: +\f[\mathbf{G}_a = \mathcal{F}\{src_1\}, \; \mathbf{G}_b = \mathcal{F}\{src_2\}\f] +where \f$\mathcal{F}\f$ is the forward DFT. +- It then computes the cross-power spectrum of each frequency domain array: +\f[R = \frac{ \mathbf{G}_a \mathbf{G}_b^*}{|\mathbf{G}_a \mathbf{G}_b^*|}\f] +- Next the cross-correlation is converted back into the time domain via the inverse DFT: +\f[r = \mathcal{F}^{-1}\{R\}\f] +- Finally, it computes the peak location and computes a 5x5 weighted centroid around the peak to +achieve sub-pixel accuracy. +\f[(\Delta x, \Delta y) = \texttt{weightedCentroid} \{\arg \max_{(x, y)}\{r\}\}\f] +- If non-zero, the response parameter is computed as the sum of the elements of r within the 5x5 +centroid around the peak location. It is normalized to a maximum of 1 (meaning there is a single +peak) and will be smaller when there are multiple peaks. + +@param src1 Source floating point array (CV_32FC1 or CV_64FC1) +@param src2 Source floating point array (CV_32FC1 or CV_64FC1) +@param window Floating point array with windowing coefficients to reduce edge effects (optional). +@param response Signal power within the 5x5 centroid around the peak, between 0 and 1 (optional). +@returns detected phase shift (sub-pixel) between the two arrays. + +@sa dft, getOptimalDFTSize, idft, mulSpectrums createHanningWindow + */ +CV_EXPORTS_W Point2d phaseCorrelate(InputArray src1, InputArray src2, + InputArray window = noArray(), CV_OUT double* response = 0); + +/** @brief This function computes a Hanning window coefficients in two dimensions. + +See (http://en.wikipedia.org/wiki/Hann_function) and (http://en.wikipedia.org/wiki/Window_function) +for more information. + +An example is shown below: +@code + // create hanning window of size 100x100 and type CV_32F + Mat hann; + createHanningWindow(hann, Size(100, 100), CV_32F); +@endcode +@param dst Destination array to place Hann coefficients in +@param winSize The window size specifications (both width and height must be > 1) +@param type Created array type + */ +CV_EXPORTS_W void createHanningWindow(OutputArray dst, Size winSize, int type); + +//! @} imgproc_motion + +//! @addtogroup imgproc_misc +//! @{ + +/** @brief Applies a fixed-level threshold to each array element. + +The function applies fixed-level thresholding to a multiple-channel array. The function is typically +used to get a bi-level (binary) image out of a grayscale image ( #compare could be also used for +this purpose) or for removing a noise, that is, filtering out pixels with too small or too large +values. There are several types of thresholding supported by the function. They are determined by +type parameter. + +Also, the special values #THRESH_OTSU or #THRESH_TRIANGLE may be combined with one of the +above values. In these cases, the function determines the optimal threshold value using the Otsu's +or Triangle algorithm and uses it instead of the specified thresh. + +@note Currently, the Otsu's and Triangle methods are implemented only for 8-bit single-channel images. + +@param src input array (multiple-channel, 8-bit or 32-bit floating point). +@param dst output array of the same size and type and the same number of channels as src. +@param thresh threshold value. +@param maxval maximum value to use with the #THRESH_BINARY and #THRESH_BINARY_INV thresholding +types. +@param type thresholding type (see #ThresholdTypes). +@return the computed threshold value if Otsu's or Triangle methods used. + +@sa adaptiveThreshold, findContours, compare, min, max + */ +CV_EXPORTS_W double threshold( InputArray src, OutputArray dst, + double thresh, double maxval, int type ); + + +/** @brief Applies an adaptive threshold to an array. + +The function transforms a grayscale image to a binary image according to the formulae: +- **THRESH_BINARY** + \f[dst(x,y) = \fork{\texttt{maxValue}}{if \(src(x,y) > T(x,y)\)}{0}{otherwise}\f] +- **THRESH_BINARY_INV** + \f[dst(x,y) = \fork{0}{if \(src(x,y) > T(x,y)\)}{\texttt{maxValue}}{otherwise}\f] +where \f$T(x,y)\f$ is a threshold calculated individually for each pixel (see adaptiveMethod parameter). + +The function can process the image in-place. + +@param src Source 8-bit single-channel image. +@param dst Destination image of the same size and the same type as src. +@param maxValue Non-zero value assigned to the pixels for which the condition is satisfied +@param adaptiveMethod Adaptive thresholding algorithm to use, see #AdaptiveThresholdTypes. +The #BORDER_REPLICATE | #BORDER_ISOLATED is used to process boundaries. +@param thresholdType Thresholding type that must be either #THRESH_BINARY or #THRESH_BINARY_INV, +see #ThresholdTypes. +@param blockSize Size of a pixel neighborhood that is used to calculate a threshold value for the +pixel: 3, 5, 7, and so on. +@param C Constant subtracted from the mean or weighted mean (see the details below). Normally, it +is positive but may be zero or negative as well. + +@sa threshold, blur, GaussianBlur + */ +CV_EXPORTS_W void adaptiveThreshold( InputArray src, OutputArray dst, + double maxValue, int adaptiveMethod, + int thresholdType, int blockSize, double C ); + +//! @} imgproc_misc + +//! @addtogroup imgproc_filter +//! @{ + +/** @example samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp +An example using pyrDown and pyrUp functions +*/ + +/** @brief Blurs an image and downsamples it. + +By default, size of the output image is computed as `Size((src.cols+1)/2, (src.rows+1)/2)`, but in +any case, the following conditions should be satisfied: + +\f[\begin{array}{l} | \texttt{dstsize.width} *2-src.cols| \leq 2 \\ | \texttt{dstsize.height} *2-src.rows| \leq 2 \end{array}\f] + +The function performs the downsampling step of the Gaussian pyramid construction. First, it +convolves the source image with the kernel: + +\f[\frac{1}{256} \begin{bmatrix} 1 & 4 & 6 & 4 & 1 \\ 4 & 16 & 24 & 16 & 4 \\ 6 & 24 & 36 & 24 & 6 \\ 4 & 16 & 24 & 16 & 4 \\ 1 & 4 & 6 & 4 & 1 \end{bmatrix}\f] + +Then, it downsamples the image by rejecting even rows and columns. + +@param src input image. +@param dst output image; it has the specified size and the same type as src. +@param dstsize size of the output image. +@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported) + */ +CV_EXPORTS_W void pyrDown( InputArray src, OutputArray dst, + const Size& dstsize = Size(), int borderType = BORDER_DEFAULT ); + +/** @brief Upsamples an image and then blurs it. + +By default, size of the output image is computed as `Size(src.cols\*2, (src.rows\*2)`, but in any +case, the following conditions should be satisfied: + +\f[\begin{array}{l} | \texttt{dstsize.width} -src.cols*2| \leq ( \texttt{dstsize.width} \mod 2) \\ | \texttt{dstsize.height} -src.rows*2| \leq ( \texttt{dstsize.height} \mod 2) \end{array}\f] + +The function performs the upsampling step of the Gaussian pyramid construction, though it can +actually be used to construct the Laplacian pyramid. First, it upsamples the source image by +injecting even zero rows and columns and then convolves the result with the same kernel as in +pyrDown multiplied by 4. + +@param src input image. +@param dst output image. It has the specified size and the same type as src . +@param dstsize size of the output image. +@param borderType Pixel extrapolation method, see #BorderTypes (only #BORDER_DEFAULT is supported) + */ +CV_EXPORTS_W void pyrUp( InputArray src, OutputArray dst, + const Size& dstsize = Size(), int borderType = BORDER_DEFAULT ); + +/** @brief Constructs the Gaussian pyramid for an image. + +The function constructs a vector of images and builds the Gaussian pyramid by recursively applying +pyrDown to the previously built pyramid layers, starting from `dst[0]==src`. + +@param src Source image. Check pyrDown for the list of supported types. +@param dst Destination vector of maxlevel+1 images of the same type as src. dst[0] will be the +same as src. dst[1] is the next pyramid layer, a smoothed and down-sized src, and so on. +@param maxlevel 0-based index of the last (the smallest) pyramid layer. It must be non-negative. +@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported) + */ +CV_EXPORTS void buildPyramid( InputArray src, OutputArrayOfArrays dst, + int maxlevel, int borderType = BORDER_DEFAULT ); + +//! @} imgproc_filter + +//! @addtogroup imgproc_hist +//! @{ + +/** @example samples/cpp/demhist.cpp +An example for creating histograms of an image +*/ + +/** @brief Calculates a histogram of a set of arrays. + +The function cv::calcHist calculates the histogram of one or more arrays. The elements of a tuple used +to increment a histogram bin are taken from the corresponding input arrays at the same location. The +sample below shows how to compute a 2D Hue-Saturation histogram for a color image. : +@include snippets/imgproc_calcHist.cpp + +@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same +size. Each of them can have an arbitrary number of channels. +@param nimages Number of source images. +@param channels List of the dims channels used to compute the histogram. The first array channels +are numerated from 0 to images[0].channels()-1 , the second array channels are counted from +images[0].channels() to images[0].channels() + images[1].channels()-1, and so on. +@param mask Optional mask. If the matrix is not empty, it must be an 8-bit array of the same size +as images[i] . The non-zero mask elements mark the array elements counted in the histogram. +@param hist Output histogram, which is a dense or sparse dims -dimensional array. +@param dims Histogram dimensionality that must be positive and not greater than CV_MAX_DIMS +(equal to 32 in the current OpenCV version). +@param histSize Array of histogram sizes in each dimension. +@param ranges Array of the dims arrays of the histogram bin boundaries in each dimension. When the +histogram is uniform ( uniform =true), then for each dimension i it is enough to specify the lower +(inclusive) boundary \f$L_0\f$ of the 0-th histogram bin and the upper (exclusive) boundary +\f$U_{\texttt{histSize}[i]-1}\f$ for the last histogram bin histSize[i]-1 . That is, in case of a +uniform histogram each of ranges[i] is an array of 2 elements. When the histogram is not uniform ( +uniform=false ), then each of ranges[i] contains histSize[i]+1 elements: +\f$L_0, U_0=L_1, U_1=L_2, ..., U_{\texttt{histSize[i]}-2}=L_{\texttt{histSize[i]}-1}, U_{\texttt{histSize[i]}-1}\f$ +. The array elements, that are not between \f$L_0\f$ and \f$U_{\texttt{histSize[i]}-1}\f$ , are not +counted in the histogram. +@param uniform Flag indicating whether the histogram is uniform or not (see above). +@param accumulate Accumulation flag. If it is set, the histogram is not cleared in the beginning +when it is allocated. This feature enables you to compute a single histogram from several sets of +arrays, or to update the histogram in time. +*/ +CV_EXPORTS void calcHist( const Mat* images, int nimages, + const int* channels, InputArray mask, + OutputArray hist, int dims, const int* histSize, + const float** ranges, bool uniform = true, bool accumulate = false ); + +/** @overload + +this variant uses %SparseMat for output +*/ +CV_EXPORTS void calcHist( const Mat* images, int nimages, + const int* channels, InputArray mask, + SparseMat& hist, int dims, + const int* histSize, const float** ranges, + bool uniform = true, bool accumulate = false ); + +/** @overload */ +CV_EXPORTS_W void calcHist( InputArrayOfArrays images, + const std::vector& channels, + InputArray mask, OutputArray hist, + const std::vector& histSize, + const std::vector& ranges, + bool accumulate = false ); + +/** @brief Calculates the back projection of a histogram. + +The function cv::calcBackProject calculates the back project of the histogram. That is, similarly to +#calcHist , at each location (x, y) the function collects the values from the selected channels +in the input images and finds the corresponding histogram bin. But instead of incrementing it, the +function reads the bin value, scales it by scale , and stores in backProject(x,y) . In terms of +statistics, the function computes probability of each element value in respect with the empirical +probability distribution represented by the histogram. See how, for example, you can find and track +a bright-colored object in a scene: + +- Before tracking, show the object to the camera so that it covers almost the whole frame. +Calculate a hue histogram. The histogram may have strong maximums, corresponding to the dominant +colors in the object. + +- When tracking, calculate a back projection of a hue plane of each input video frame using that +pre-computed histogram. Threshold the back projection to suppress weak colors. It may also make +sense to suppress pixels with non-sufficient color saturation and too dark or too bright pixels. + +- Find connected components in the resulting picture and choose, for example, the largest +component. + +This is an approximate algorithm of the CamShift color object tracker. + +@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same +size. Each of them can have an arbitrary number of channels. +@param nimages Number of source images. +@param channels The list of channels used to compute the back projection. The number of channels +must match the histogram dimensionality. The first array channels are numerated from 0 to +images[0].channels()-1 , the second array channels are counted from images[0].channels() to +images[0].channels() + images[1].channels()-1, and so on. +@param hist Input histogram that can be dense or sparse. +@param backProject Destination back projection array that is a single-channel array of the same +size and depth as images[0] . +@param ranges Array of arrays of the histogram bin boundaries in each dimension. See #calcHist . +@param scale Optional scale factor for the output back projection. +@param uniform Flag indicating whether the histogram is uniform or not (see above). + +@sa calcHist, compareHist + */ +CV_EXPORTS void calcBackProject( const Mat* images, int nimages, + const int* channels, InputArray hist, + OutputArray backProject, const float** ranges, + double scale = 1, bool uniform = true ); + +/** @overload */ +CV_EXPORTS void calcBackProject( const Mat* images, int nimages, + const int* channels, const SparseMat& hist, + OutputArray backProject, const float** ranges, + double scale = 1, bool uniform = true ); + +/** @overload */ +CV_EXPORTS_W void calcBackProject( InputArrayOfArrays images, const std::vector& channels, + InputArray hist, OutputArray dst, + const std::vector& ranges, + double scale ); + +/** @brief Compares two histograms. + +The function cv::compareHist compares two dense or two sparse histograms using the specified method. + +The function returns \f$d(H_1, H_2)\f$ . + +While the function works well with 1-, 2-, 3-dimensional dense histograms, it may not be suitable +for high-dimensional sparse histograms. In such histograms, because of aliasing and sampling +problems, the coordinates of non-zero histogram bins can slightly shift. To compare such histograms +or more general sparse configurations of weighted points, consider using the #EMD function. + +@param H1 First compared histogram. +@param H2 Second compared histogram of the same size as H1 . +@param method Comparison method, see #HistCompMethods + */ +CV_EXPORTS_W double compareHist( InputArray H1, InputArray H2, int method ); + +/** @overload */ +CV_EXPORTS double compareHist( const SparseMat& H1, const SparseMat& H2, int method ); + +/** @brief Equalizes the histogram of a grayscale image. + +The function equalizes the histogram of the input image using the following algorithm: + +- Calculate the histogram \f$H\f$ for src . +- Normalize the histogram so that the sum of histogram bins is 255. +- Compute the integral of the histogram: +\f[H'_i = \sum _{0 \le j < i} H(j)\f] +- Transform the image using \f$H'\f$ as a look-up table: \f$\texttt{dst}(x,y) = H'(\texttt{src}(x,y))\f$ + +The algorithm normalizes the brightness and increases the contrast of the image. + +@param src Source 8-bit single channel image. +@param dst Destination image of the same size and type as src . + */ +CV_EXPORTS_W void equalizeHist( InputArray src, OutputArray dst ); + +/** @brief Creates a smart pointer to a cv::CLAHE class and initializes it. + +@param clipLimit Threshold for contrast limiting. +@param tileGridSize Size of grid for histogram equalization. Input image will be divided into +equally sized rectangular tiles. tileGridSize defines the number of tiles in row and column. + */ +CV_EXPORTS_W Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + +/** @brief Computes the "minimal work" distance between two weighted point configurations. + +The function computes the earth mover distance and/or a lower boundary of the distance between the +two weighted point configurations. One of the applications described in @cite RubnerSept98, +@cite Rubner2000 is multi-dimensional histogram comparison for image retrieval. EMD is a transportation +problem that is solved using some modification of a simplex algorithm, thus the complexity is +exponential in the worst case, though, on average it is much faster. In the case of a real metric +the lower boundary can be calculated even faster (using linear-time algorithm) and it can be used +to determine roughly whether the two signatures are far enough so that they cannot relate to the +same object. + +@param signature1 First signature, a \f$\texttt{size1}\times \texttt{dims}+1\f$ floating-point matrix. +Each row stores the point weight followed by the point coordinates. The matrix is allowed to have +a single column (weights only) if the user-defined cost matrix is used. The weights must be +non-negative and have at least one non-zero value. +@param signature2 Second signature of the same format as signature1 , though the number of rows +may be different. The total weights may be different. In this case an extra "dummy" point is added +to either signature1 or signature2. The weights must be non-negative and have at least one non-zero +value. +@param distType Used metric. See #DistanceTypes. +@param cost User-defined \f$\texttt{size1}\times \texttt{size2}\f$ cost matrix. Also, if a cost matrix +is used, lower boundary lowerBound cannot be calculated because it needs a metric function. +@param lowerBound Optional input/output parameter: lower boundary of a distance between the two +signatures that is a distance between mass centers. The lower boundary may not be calculated if +the user-defined cost matrix is used, the total weights of point configurations are not equal, or +if the signatures consist of weights only (the signature matrices have a single column). You +**must** initialize \*lowerBound . If the calculated distance between mass centers is greater or +equal to \*lowerBound (it means that the signatures are far enough), the function does not +calculate EMD. In any case \*lowerBound is set to the calculated distance between mass centers on +return. Thus, if you want to calculate both distance between mass centers and EMD, \*lowerBound +should be set to 0. +@param flow Resultant \f$\texttt{size1} \times \texttt{size2}\f$ flow matrix: \f$\texttt{flow}_{i,j}\f$ is +a flow from \f$i\f$ -th point of signature1 to \f$j\f$ -th point of signature2 . + */ +CV_EXPORTS float EMD( InputArray signature1, InputArray signature2, + int distType, InputArray cost=noArray(), + float* lowerBound = 0, OutputArray flow = noArray() ); + +CV_EXPORTS_AS(EMD) float wrapperEMD( InputArray signature1, InputArray signature2, + int distType, InputArray cost=noArray(), + CV_IN_OUT Ptr lowerBound = Ptr(), OutputArray flow = noArray() ); + +//! @} imgproc_hist + +/** @example samples/cpp/watershed.cpp +An example using the watershed algorithm +*/ + +/** @brief Performs a marker-based image segmentation using the watershed algorithm. + +The function implements one of the variants of watershed, non-parametric marker-based segmentation +algorithm, described in @cite Meyer92 . + +Before passing the image to the function, you have to roughly outline the desired regions in the +image markers with positive (\>0) indices. So, every region is represented as one or more connected +components with the pixel values 1, 2, 3, and so on. Such markers can be retrieved from a binary +mask using #findContours and #drawContours (see the watershed.cpp demo). The markers are "seeds" of +the future image regions. All the other pixels in markers , whose relation to the outlined regions +is not known and should be defined by the algorithm, should be set to 0's. In the function output, +each pixel in markers is set to a value of the "seed" components or to -1 at boundaries between the +regions. + +@note Any two neighbor connected components are not necessarily separated by a watershed boundary +(-1's pixels); for example, they can touch each other in the initial marker image passed to the +function. + +@param image Input 8-bit 3-channel image. +@param markers Input/output 32-bit single-channel image (map) of markers. It should have the same +size as image . + +@sa findContours + +@ingroup imgproc_misc + */ +CV_EXPORTS_W void watershed( InputArray image, InputOutputArray markers ); + +//! @addtogroup imgproc_filter +//! @{ + +/** @brief Performs initial step of meanshift segmentation of an image. + +The function implements the filtering stage of meanshift segmentation, that is, the output of the +function is the filtered "posterized" image with color gradients and fine-grain texture flattened. +At every pixel (X,Y) of the input image (or down-sized input image, see below) the function executes +meanshift iterations, that is, the pixel (X,Y) neighborhood in the joint space-color hyperspace is +considered: + +\f[(x,y): X- \texttt{sp} \le x \le X+ \texttt{sp} , Y- \texttt{sp} \le y \le Y+ \texttt{sp} , ||(R,G,B)-(r,g,b)|| \le \texttt{sr}\f] + +where (R,G,B) and (r,g,b) are the vectors of color components at (X,Y) and (x,y), respectively +(though, the algorithm does not depend on the color space used, so any 3-component color space can +be used instead). Over the neighborhood the average spatial value (X',Y') and average color vector +(R',G',B') are found and they act as the neighborhood center on the next iteration: + +\f[(X,Y)~(X',Y'), (R,G,B)~(R',G',B').\f] + +After the iterations over, the color components of the initial pixel (that is, the pixel from where +the iterations started) are set to the final value (average color at the last iteration): + +\f[I(X,Y) <- (R*,G*,B*)\f] + +When maxLevel \> 0, the gaussian pyramid of maxLevel+1 levels is built, and the above procedure is +run on the smallest layer first. After that, the results are propagated to the larger layer and the +iterations are run again only on those pixels where the layer colors differ by more than sr from the +lower-resolution layer of the pyramid. That makes boundaries of color regions sharper. Note that the +results will be actually different from the ones obtained by running the meanshift procedure on the +whole original image (i.e. when maxLevel==0). + +@param src The source 8-bit, 3-channel image. +@param dst The destination image of the same format and the same size as the source. +@param sp The spatial window radius. +@param sr The color window radius. +@param maxLevel Maximum level of the pyramid for the segmentation. +@param termcrit Termination criteria: when to stop meanshift iterations. + */ +CV_EXPORTS_W void pyrMeanShiftFiltering( InputArray src, OutputArray dst, + double sp, double sr, int maxLevel = 1, + TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5,1) ); + +//! @} + +//! @addtogroup imgproc_misc +//! @{ + +/** @example samples/cpp/grabcut.cpp +An example using the GrabCut algorithm +![Sample Screenshot](grabcut_output1.jpg) +*/ + +/** @brief Runs the GrabCut algorithm. + +The function implements the [GrabCut image segmentation algorithm](http://en.wikipedia.org/wiki/GrabCut). + +@param img Input 8-bit 3-channel image. +@param mask Input/output 8-bit single-channel mask. The mask is initialized by the function when +mode is set to #GC_INIT_WITH_RECT. Its elements may have one of the #GrabCutClasses. +@param rect ROI containing a segmented object. The pixels outside of the ROI are marked as +"obvious background". The parameter is only used when mode==#GC_INIT_WITH_RECT . +@param bgdModel Temporary array for the background model. Do not modify it while you are +processing the same image. +@param fgdModel Temporary arrays for the foreground model. Do not modify it while you are +processing the same image. +@param iterCount Number of iterations the algorithm should make before returning the result. Note +that the result can be refined with further calls with mode==#GC_INIT_WITH_MASK or +mode==GC_EVAL . +@param mode Operation mode that could be one of the #GrabCutModes + */ +CV_EXPORTS_W void grabCut( InputArray img, InputOutputArray mask, Rect rect, + InputOutputArray bgdModel, InputOutputArray fgdModel, + int iterCount, int mode = GC_EVAL ); + +/** @example samples/cpp/distrans.cpp +An example on using the distance transform +*/ + +/** @brief Calculates the distance to the closest zero pixel for each pixel of the source image. + +The function cv::distanceTransform calculates the approximate or precise distance from every binary +image pixel to the nearest zero pixel. For zero image pixels, the distance will obviously be zero. + +When maskSize == #DIST_MASK_PRECISE and distanceType == #DIST_L2 , the function runs the +algorithm described in @cite Felzenszwalb04 . This algorithm is parallelized with the TBB library. + +In other cases, the algorithm @cite Borgefors86 is used. This means that for a pixel the function +finds the shortest path to the nearest zero pixel consisting of basic shifts: horizontal, vertical, +diagonal, or knight's move (the latest is available for a \f$5\times 5\f$ mask). The overall +distance is calculated as a sum of these basic distances. Since the distance function should be +symmetric, all of the horizontal and vertical shifts must have the same cost (denoted as a ), all +the diagonal shifts must have the same cost (denoted as `b`), and all knight's moves must have the +same cost (denoted as `c`). For the #DIST_C and #DIST_L1 types, the distance is calculated +precisely, whereas for #DIST_L2 (Euclidean distance) the distance can be calculated only with a +relative error (a \f$5\times 5\f$ mask gives more accurate results). For `a`,`b`, and `c`, OpenCV +uses the values suggested in the original paper: +- DIST_L1: `a = 1, b = 2` +- DIST_L2: + - `3 x 3`: `a=0.955, b=1.3693` + - `5 x 5`: `a=1, b=1.4, c=2.1969` +- DIST_C: `a = 1, b = 1` + +Typically, for a fast, coarse distance estimation #DIST_L2, a \f$3\times 3\f$ mask is used. For a +more accurate distance estimation #DIST_L2, a \f$5\times 5\f$ mask or the precise algorithm is used. +Note that both the precise and the approximate algorithms are linear on the number of pixels. + +This variant of the function does not only compute the minimum distance for each pixel \f$(x, y)\f$ +but also identifies the nearest connected component consisting of zero pixels +(labelType==#DIST_LABEL_CCOMP) or the nearest zero pixel (labelType==#DIST_LABEL_PIXEL). Index of the +component/pixel is stored in `labels(x, y)`. When labelType==#DIST_LABEL_CCOMP, the function +automatically finds connected components of zero pixels in the input image and marks them with +distinct labels. When labelType==#DIST_LABEL_PIXEL, the function scans through the input image and +marks all the zero pixels with distinct labels. + +In this mode, the complexity is still linear. That is, the function provides a very fast way to +compute the Voronoi diagram for a binary image. Currently, the second variant can use only the +approximate distance transform algorithm, i.e. maskSize=#DIST_MASK_PRECISE is not supported +yet. + +@param src 8-bit, single-channel (binary) source image. +@param dst Output image with calculated distances. It is a 8-bit or 32-bit floating-point, +single-channel image of the same size as src. +@param labels Output 2D array of labels (the discrete Voronoi diagram). It has the type +CV_32SC1 and the same size as src. +@param distanceType Type of distance, see #DistanceTypes +@param maskSize Size of the distance transform mask, see #DistanceTransformMasks. +#DIST_MASK_PRECISE is not supported by this variant. In case of the #DIST_L1 or #DIST_C distance type, +the parameter is forced to 3 because a \f$3\times 3\f$ mask gives the same result as \f$5\times +5\f$ or any larger aperture. +@param labelType Type of the label array to build, see #DistanceTransformLabelTypes. + */ +CV_EXPORTS_AS(distanceTransformWithLabels) void distanceTransform( InputArray src, OutputArray dst, + OutputArray labels, int distanceType, int maskSize, + int labelType = DIST_LABEL_CCOMP ); + +/** @overload +@param src 8-bit, single-channel (binary) source image. +@param dst Output image with calculated distances. It is a 8-bit or 32-bit floating-point, +single-channel image of the same size as src . +@param distanceType Type of distance, see #DistanceTypes +@param maskSize Size of the distance transform mask, see #DistanceTransformMasks. In case of the +#DIST_L1 or #DIST_C distance type, the parameter is forced to 3 because a \f$3\times 3\f$ mask gives +the same result as \f$5\times 5\f$ or any larger aperture. +@param dstType Type of output image. It can be CV_8U or CV_32F. Type CV_8U can be used only for +the first variant of the function and distanceType == #DIST_L1. +*/ +CV_EXPORTS_W void distanceTransform( InputArray src, OutputArray dst, + int distanceType, int maskSize, int dstType=CV_32F); + +/** @example samples/cpp/ffilldemo.cpp +An example using the FloodFill technique +*/ + +/** @overload + +variant without `mask` parameter +*/ +CV_EXPORTS int floodFill( InputOutputArray image, + Point seedPoint, Scalar newVal, CV_OUT Rect* rect = 0, + Scalar loDiff = Scalar(), Scalar upDiff = Scalar(), + int flags = 4 ); + +/** @brief Fills a connected component with the given color. + +The function cv::floodFill fills a connected component starting from the seed point with the specified +color. The connectivity is determined by the color/brightness closeness of the neighbor pixels. The +pixel at \f$(x,y)\f$ is considered to belong to the repainted domain if: + +- in case of a grayscale image and floating range +\f[\texttt{src} (x',y')- \texttt{loDiff} \leq \texttt{src} (x,y) \leq \texttt{src} (x',y')+ \texttt{upDiff}\f] + + +- in case of a grayscale image and fixed range +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)- \texttt{loDiff} \leq \texttt{src} (x,y) \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)+ \texttt{upDiff}\f] + + +- in case of a color image and floating range +\f[\texttt{src} (x',y')_r- \texttt{loDiff} _r \leq \texttt{src} (x,y)_r \leq \texttt{src} (x',y')_r+ \texttt{upDiff} _r,\f] +\f[\texttt{src} (x',y')_g- \texttt{loDiff} _g \leq \texttt{src} (x,y)_g \leq \texttt{src} (x',y')_g+ \texttt{upDiff} _g\f] +and +\f[\texttt{src} (x',y')_b- \texttt{loDiff} _b \leq \texttt{src} (x,y)_b \leq \texttt{src} (x',y')_b+ \texttt{upDiff} _b\f] + + +- in case of a color image and fixed range +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_r- \texttt{loDiff} _r \leq \texttt{src} (x,y)_r \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_r+ \texttt{upDiff} _r,\f] +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_g- \texttt{loDiff} _g \leq \texttt{src} (x,y)_g \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_g+ \texttt{upDiff} _g\f] +and +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_b- \texttt{loDiff} _b \leq \texttt{src} (x,y)_b \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_b+ \texttt{upDiff} _b\f] + + +where \f$src(x',y')\f$ is the value of one of pixel neighbors that is already known to belong to the +component. That is, to be added to the connected component, a color/brightness of the pixel should +be close enough to: +- Color/brightness of one of its neighbors that already belong to the connected component in case +of a floating range. +- Color/brightness of the seed point in case of a fixed range. + +Use these functions to either mark a connected component with the specified color in-place, or build +a mask and then extract the contour, or copy the region to another image, and so on. + +@param image Input/output 1- or 3-channel, 8-bit, or floating-point image. It is modified by the +function unless the #FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See +the details below. +@param mask Operation mask that should be a single-channel 8-bit image, 2 pixels wider and 2 pixels +taller than image. Since this is both an input and output parameter, you must take responsibility +of initializing it. Flood-filling cannot go across non-zero pixels in the input mask. For example, +an edge detector output can be used as a mask to stop filling at edges. On output, pixels in the +mask corresponding to filled pixels in the image are set to 1 or to the a value specified in flags +as described below. Additionally, the function fills the border of the mask with ones to simplify +internal processing. It is therefore possible to use the same mask in multiple calls to the function +to make sure the filled areas do not overlap. +@param seedPoint Starting point. +@param newVal New value of the repainted domain pixels. +@param loDiff Maximal lower brightness/color difference between the currently observed pixel and +one of its neighbors belonging to the component, or a seed pixel being added to the component. +@param upDiff Maximal upper brightness/color difference between the currently observed pixel and +one of its neighbors belonging to the component, or a seed pixel being added to the component. +@param rect Optional output parameter set by the function to the minimum bounding rectangle of the +repainted domain. +@param flags Operation flags. The first 8 bits contain a connectivity value. The default value of +4 means that only the four nearest neighbor pixels (those that share an edge) are considered. A +connectivity value of 8 means that the eight nearest neighbor pixels (those that share a corner) +will be considered. The next 8 bits (8-16) contain a value between 1 and 255 with which to fill +the mask (the default value is 1). For example, 4 | ( 255 \<\< 8 ) will consider 4 nearest +neighbours and fill the mask with a value of 255. The following additional options occupy higher +bits and therefore may be further combined with the connectivity and mask fill values using +bit-wise or (|), see #FloodFillFlags. + +@note Since the mask is larger than the filled image, a pixel \f$(x, y)\f$ in image corresponds to the +pixel \f$(x+1, y+1)\f$ in the mask . + +@sa findContours + */ +CV_EXPORTS_W int floodFill( InputOutputArray image, InputOutputArray mask, + Point seedPoint, Scalar newVal, CV_OUT Rect* rect=0, + Scalar loDiff = Scalar(), Scalar upDiff = Scalar(), + int flags = 4 ); + +//! Performs linear blending of two images: +//! \f[ \texttt{dst}(i,j) = \texttt{weights1}(i,j)*\texttt{src1}(i,j) + \texttt{weights2}(i,j)*\texttt{src2}(i,j) \f] +//! @param src1 It has a type of CV_8UC(n) or CV_32FC(n), where n is a positive integer. +//! @param src2 It has the same type and size as src1. +//! @param weights1 It has a type of CV_32FC1 and the same size with src1. +//! @param weights2 It has a type of CV_32FC1 and the same size with src1. +//! @param dst It is created if it does not have the same size and type with src1. +CV_EXPORTS void blendLinear(InputArray src1, InputArray src2, InputArray weights1, InputArray weights2, OutputArray dst); + +//! @} imgproc_misc + +//! @addtogroup imgproc_color_conversions +//! @{ + +/** @brief Converts an image from one color space to another. + +The function converts an input image from one color space to another. In case of a transformation +to-from RGB color space, the order of the channels should be specified explicitly (RGB or BGR). Note +that the default color format in OpenCV is often referred to as RGB but it is actually BGR (the +bytes are reversed). So the first byte in a standard (24-bit) color image will be an 8-bit Blue +component, the second byte will be Green, and the third byte will be Red. The fourth, fifth, and +sixth bytes would then be the second pixel (Blue, then Green, then Red), and so on. + +The conventional ranges for R, G, and B channel values are: +- 0 to 255 for CV_8U images +- 0 to 65535 for CV_16U images +- 0 to 1 for CV_32F images + +In case of linear transformations, the range does not matter. But in case of a non-linear +transformation, an input RGB image should be normalized to the proper value range to get the correct +results, for example, for RGB \f$\rightarrow\f$ L\*u\*v\* transformation. For example, if you have a +32-bit floating-point image directly converted from an 8-bit image without any scaling, then it will +have the 0..255 value range instead of 0..1 assumed by the function. So, before calling #cvtColor , +you need first to scale the image down: +@code + img *= 1./255; + cvtColor(img, img, COLOR_BGR2Luv); +@endcode +If you use #cvtColor with 8-bit images, the conversion will have some information lost. For many +applications, this will not be noticeable but it is recommended to use 32-bit images in applications +that need the full range of colors or that convert an image before an operation and then convert +back. + +If conversion adds the alpha channel, its value will set to the maximum of corresponding channel +range: 255 for CV_8U, 65535 for CV_16U, 1 for CV_32F. + +@param src input image: 8-bit unsigned, 16-bit unsigned ( CV_16UC... ), or single-precision +floating-point. +@param dst output image of the same size and depth as src. +@param code color space conversion code (see #ColorConversionCodes). +@param dstCn number of channels in the destination image; if the parameter is 0, the number of the +channels is derived automatically from src and code. + +@see @ref imgproc_color_conversions + */ +CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0 ); + +/** @brief Converts an image from one color space to another where the source image is +stored in two planes. + +This function only supports YUV420 to RGB conversion as of now. + +@param src1: 8-bit image (#CV_8U) of the Y plane. +@param src2: image containing interleaved U/V plane. +@param dst: output image. +@param code: Specifies the type of conversion. It can take any of the following values: +- #COLOR_YUV2BGR_NV12 +- #COLOR_YUV2RGB_NV12 +- #COLOR_YUV2BGRA_NV12 +- #COLOR_YUV2RGBA_NV12 +- #COLOR_YUV2BGR_NV21 +- #COLOR_YUV2RGB_NV21 +- #COLOR_YUV2BGRA_NV21 +- #COLOR_YUV2RGBA_NV21 +*/ +CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code ); + +/** @brief main function for all demosaicing processes + +@param src input image: 8-bit unsigned or 16-bit unsigned. +@param dst output image of the same size and depth as src. +@param code Color space conversion code (see the description below). +@param dstCn number of channels in the destination image; if the parameter is 0, the number of the +channels is derived automatically from src and code. + +The function can do the following transformations: + +- Demosaicing using bilinear interpolation + + #COLOR_BayerBG2BGR , #COLOR_BayerGB2BGR , #COLOR_BayerRG2BGR , #COLOR_BayerGR2BGR + + #COLOR_BayerBG2GRAY , #COLOR_BayerGB2GRAY , #COLOR_BayerRG2GRAY , #COLOR_BayerGR2GRAY + +- Demosaicing using Variable Number of Gradients. + + #COLOR_BayerBG2BGR_VNG , #COLOR_BayerGB2BGR_VNG , #COLOR_BayerRG2BGR_VNG , #COLOR_BayerGR2BGR_VNG + +- Edge-Aware Demosaicing. + + #COLOR_BayerBG2BGR_EA , #COLOR_BayerGB2BGR_EA , #COLOR_BayerRG2BGR_EA , #COLOR_BayerGR2BGR_EA + +- Demosaicing with alpha channel + + #COLOR_BayerBG2BGRA , #COLOR_BayerGB2BGRA , #COLOR_BayerRG2BGRA , #COLOR_BayerGR2BGRA + +@sa cvtColor +*/ +CV_EXPORTS_W void demosaicing(InputArray src, OutputArray dst, int code, int dstCn = 0); + +//! @} imgproc_color_conversions + +//! @addtogroup imgproc_shape +//! @{ + +/** @brief Calculates all of the moments up to the third order of a polygon or rasterized shape. + +The function computes moments, up to the 3rd order, of a vector shape or a rasterized shape. The +results are returned in the structure cv::Moments. + +@param array Raster image (single-channel, 8-bit or floating-point 2D array) or an array ( +\f$1 \times N\f$ or \f$N \times 1\f$ ) of 2D points (Point or Point2f ). +@param binaryImage If it is true, all non-zero image pixels are treated as 1's. The parameter is +used for images only. +@returns moments. + +@note Only applicable to contour moments calculations from Python bindings: Note that the numpy +type for the input array should be either np.int32 or np.float32. + +@sa contourArea, arcLength + */ +CV_EXPORTS_W Moments moments( InputArray array, bool binaryImage = false ); + +/** @brief Calculates seven Hu invariants. + +The function calculates seven Hu invariants (introduced in @cite Hu62; see also +) defined as: + +\f[\begin{array}{l} hu[0]= \eta _{20}+ \eta _{02} \\ hu[1]=( \eta _{20}- \eta _{02})^{2}+4 \eta _{11}^{2} \\ hu[2]=( \eta _{30}-3 \eta _{12})^{2}+ (3 \eta _{21}- \eta _{03})^{2} \\ hu[3]=( \eta _{30}+ \eta _{12})^{2}+ ( \eta _{21}+ \eta _{03})^{2} \\ hu[4]=( \eta _{30}-3 \eta _{12})( \eta _{30}+ \eta _{12})[( \eta _{30}+ \eta _{12})^{2}-3( \eta _{21}+ \eta _{03})^{2}]+(3 \eta _{21}- \eta _{03})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}] \\ hu[5]=( \eta _{20}- \eta _{02})[( \eta _{30}+ \eta _{12})^{2}- ( \eta _{21}+ \eta _{03})^{2}]+4 \eta _{11}( \eta _{30}+ \eta _{12})( \eta _{21}+ \eta _{03}) \\ hu[6]=(3 \eta _{21}- \eta _{03})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}]-( \eta _{30}-3 \eta _{12})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}] \\ \end{array}\f] + +where \f$\eta_{ji}\f$ stands for \f$\texttt{Moments::nu}_{ji}\f$ . + +These values are proved to be invariants to the image scale, rotation, and reflection except the +seventh one, whose sign is changed by reflection. This invariance is proved with the assumption of +infinite image resolution. In case of raster images, the computed Hu invariants for the original and +transformed images are a bit different. + +@param moments Input moments computed with moments . +@param hu Output Hu invariants. + +@sa matchShapes + */ +CV_EXPORTS void HuMoments( const Moments& moments, double hu[7] ); + +/** @overload */ +CV_EXPORTS_W void HuMoments( const Moments& m, OutputArray hu ); + +//! @} imgproc_shape + +//! @addtogroup imgproc_object +//! @{ + +//! type of the template matching operation +enum TemplateMatchModes { + TM_SQDIFF = 0, /*!< \f[R(x,y)= \sum _{x',y'} (T(x',y')-I(x+x',y+y'))^2\f] + with mask: + \f[R(x,y)= \sum _{x',y'} \left( (T(x',y')-I(x+x',y+y')) \cdot + M(x',y') \right)^2\f] */ + TM_SQDIFF_NORMED = 1, /*!< \f[R(x,y)= \frac{\sum_{x',y'} (T(x',y')-I(x+x',y+y'))^2}{\sqrt{\sum_{ + x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}}\f] + with mask: + \f[R(x,y)= \frac{\sum _{x',y'} \left( (T(x',y')-I(x+x',y+y')) \cdot + M(x',y') \right)^2}{\sqrt{\sum_{x',y'} \left( T(x',y') \cdot + M(x',y') \right)^2 \cdot \sum_{x',y'} \left( I(x+x',y+y') \cdot + M(x',y') \right)^2}}\f] */ + TM_CCORR = 2, /*!< \f[R(x,y)= \sum _{x',y'} (T(x',y') \cdot I(x+x',y+y'))\f] + with mask: + \f[R(x,y)= \sum _{x',y'} (T(x',y') \cdot I(x+x',y+y') \cdot M(x',y') + ^2)\f] */ + TM_CCORR_NORMED = 3, /*!< \f[R(x,y)= \frac{\sum_{x',y'} (T(x',y') \cdot I(x+x',y+y'))}{\sqrt{ + \sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}}\f] + with mask: + \f[R(x,y)= \frac{\sum_{x',y'} (T(x',y') \cdot I(x+x',y+y') \cdot + M(x',y')^2)}{\sqrt{\sum_{x',y'} \left( T(x',y') \cdot M(x',y') + \right)^2 \cdot \sum_{x',y'} \left( I(x+x',y+y') \cdot M(x',y') + \right)^2}}\f] */ + TM_CCOEFF = 4, /*!< \f[R(x,y)= \sum _{x',y'} (T'(x',y') \cdot I'(x+x',y+y'))\f] + where + \f[\begin{array}{l} T'(x',y')=T(x',y') - 1/(w \cdot h) \cdot \sum _{ + x'',y''} T(x'',y'') \\ I'(x+x',y+y')=I(x+x',y+y') - 1/(w \cdot h) + \cdot \sum _{x'',y''} I(x+x'',y+y'') \end{array}\f] + with mask: + \f[\begin{array}{l} T'(x',y')=M(x',y') \cdot \left( T(x',y') - + \frac{1}{\sum _{x'',y''} M(x'',y'')} \cdot \sum _{x'',y''} + (T(x'',y'') \cdot M(x'',y'')) \right) \\ I'(x+x',y+y')=M(x',y') + \cdot \left( I(x+x',y+y') - \frac{1}{\sum _{x'',y''} M(x'',y'')} + \cdot \sum _{x'',y''} (I(x+x'',y+y'') \cdot M(x'',y'')) \right) + \end{array} \f] */ + TM_CCOEFF_NORMED = 5 /*!< \f[R(x,y)= \frac{ \sum_{x',y'} (T'(x',y') \cdot I'(x+x',y+y')) }{ + \sqrt{\sum_{x',y'}T'(x',y')^2 \cdot \sum_{x',y'} I'(x+x',y+y')^2} + }\f] */ +}; + +/** @example samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp +An example using Template Matching algorithm +*/ + +/** @brief Compares a template against overlapped image regions. + +The function slides through image , compares the overlapped patches of size \f$w \times h\f$ against +templ using the specified method and stores the comparison results in result . #TemplateMatchModes +describes the formulae for the available comparison methods ( \f$I\f$ denotes image, \f$T\f$ +template, \f$R\f$ result, \f$M\f$ the optional mask ). The summation is done over template and/or +the image patch: \f$x' = 0...w-1, y' = 0...h-1\f$ + +After the function finishes the comparison, the best matches can be found as global minimums (when +#TM_SQDIFF was used) or maximums (when #TM_CCORR or #TM_CCOEFF was used) using the +#minMaxLoc function. In case of a color image, template summation in the numerator and each sum in +the denominator is done over all of the channels and separate mean values are used for each channel. +That is, the function can take a color template and a color image. The result will still be a +single-channel image, which is easier to analyze. + +@param image Image where the search is running. It must be 8-bit or 32-bit floating-point. +@param templ Searched template. It must be not greater than the source image and have the same +data type. +@param result Map of comparison results. It must be single-channel 32-bit floating-point. If image +is \f$W \times H\f$ and templ is \f$w \times h\f$ , then result is \f$(W-w+1) \times (H-h+1)\f$ . +@param method Parameter specifying the comparison method, see #TemplateMatchModes +@param mask Optional mask. It must have the same size as templ. It must either have the same number + of channels as template or only one channel, which is then used for all template and + image channels. If the data type is #CV_8U, the mask is interpreted as a binary mask, + meaning only elements where mask is nonzero are used and are kept unchanged independent + of the actual mask value (weight equals 1). For data tpye #CV_32F, the mask values are + used as weights. The exact formulas are documented in #TemplateMatchModes. + */ +CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ, + OutputArray result, int method, InputArray mask = noArray() ); + +//! @} + +//! @addtogroup imgproc_shape +//! @{ + +/** @example samples/cpp/connected_components.cpp +This program demonstrates connected components and use of the trackbar +*/ + +/** @brief computes the connected components labeled image of boolean image + +image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0 +represents the background label. ltype specifies the output label image type, an important +consideration based on the total number of labels or alternatively the total number of pixels in +the source image. ccltype specifies the connected components labeling algorithm to use, currently +Grana (BBDT) and Wu's (SAUF) @cite Wu2009 algorithms are supported, see the #ConnectedComponentsAlgorithmsTypes +for details. Note that SAUF algorithm forces a row major ordering of labels while BBDT does not. +This function uses parallel version of both Grana and Wu's algorithms if at least one allowed +parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs. + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +@param ccltype connected components algorithm type (see the #ConnectedComponentsAlgorithmsTypes). +*/ +CV_EXPORTS_AS(connectedComponentsWithAlgorithm) int connectedComponents(InputArray image, OutputArray labels, + int connectivity, int ltype, int ccltype); + + +/** @overload + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +*/ +CV_EXPORTS_W int connectedComponents(InputArray image, OutputArray labels, + int connectivity = 8, int ltype = CV_32S); + + +/** @brief computes the connected components labeled image of boolean image and also produces a statistics output for each label + +image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0 +represents the background label. ltype specifies the output label image type, an important +consideration based on the total number of labels or alternatively the total number of pixels in +the source image. ccltype specifies the connected components labeling algorithm to use, currently +Grana's (BBDT) and Wu's (SAUF) @cite Wu2009 algorithms are supported, see the #ConnectedComponentsAlgorithmsTypes +for details. Note that SAUF algorithm forces a row major ordering of labels while BBDT does not. +This function uses parallel version of both Grana and Wu's algorithms (statistics included) if at least one allowed +parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs. + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param stats statistics output for each label, including the background label. +Statistics are accessed via stats(label, COLUMN) where COLUMN is one of +#ConnectedComponentsTypes, selecting the statistic. The data type is CV_32S. +@param centroids centroid output for each label, including the background label. Centroids are +accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F. +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +@param ccltype connected components algorithm type (see #ConnectedComponentsAlgorithmsTypes). +*/ +CV_EXPORTS_AS(connectedComponentsWithStatsWithAlgorithm) int connectedComponentsWithStats(InputArray image, OutputArray labels, + OutputArray stats, OutputArray centroids, + int connectivity, int ltype, int ccltype); + +/** @overload +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param stats statistics output for each label, including the background label. +Statistics are accessed via stats(label, COLUMN) where COLUMN is one of +#ConnectedComponentsTypes, selecting the statistic. The data type is CV_32S. +@param centroids centroid output for each label, including the background label. Centroids are +accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F. +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +*/ +CV_EXPORTS_W int connectedComponentsWithStats(InputArray image, OutputArray labels, + OutputArray stats, OutputArray centroids, + int connectivity = 8, int ltype = CV_32S); + + +/** @brief Finds contours in a binary image. + +The function retrieves contours from the binary image using the algorithm @cite Suzuki85 . The contours +are a useful tool for shape analysis and object detection and recognition. See squares.cpp in the +OpenCV sample directory. +@note Since opencv 3.2 source image is not modified by this function. + +@param image Source, an 8-bit single-channel image. Non-zero pixels are treated as 1's. Zero +pixels remain 0's, so the image is treated as binary . You can use #compare, #inRange, #threshold , +#adaptiveThreshold, #Canny, and others to create a binary image out of a grayscale or color one. +If mode equals to #RETR_CCOMP or #RETR_FLOODFILL, the input can also be a 32-bit integer image of labels (CV_32SC1). +@param contours Detected contours. Each contour is stored as a vector of points (e.g. +std::vector >). +@param hierarchy Optional output vector (e.g. std::vector), containing information about the image topology. It has +as many elements as the number of contours. For each i-th contour contours[i], the elements +hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are set to 0-based indices +in contours of the next and previous contours at the same hierarchical level, the first child +contour and the parent contour, respectively. If for the contour i there are no next, previous, +parent, or nested contours, the corresponding elements of hierarchy[i] will be negative. +@param mode Contour retrieval mode, see #RetrievalModes +@param method Contour approximation method, see #ContourApproximationModes +@param offset Optional offset by which every contour point is shifted. This is useful if the +contours are extracted from the image ROI and then they should be analyzed in the whole image +context. + */ +CV_EXPORTS_W void findContours( InputArray image, OutputArrayOfArrays contours, + OutputArray hierarchy, int mode, + int method, Point offset = Point()); + +/** @overload */ +CV_EXPORTS void findContours( InputArray image, OutputArrayOfArrays contours, + int mode, int method, Point offset = Point()); + +/** @example samples/cpp/squares.cpp +A program using pyramid scaling, Canny, contours and contour simplification to find +squares in a list of images (pic1-6.png). Returns sequence of squares detected on the image. +*/ + +/** @example samples/tapi/squares.cpp +A program using pyramid scaling, Canny, contours and contour simplification to find +squares in the input image. +*/ + +/** @brief Approximates a polygonal curve(s) with the specified precision. + +The function cv::approxPolyDP approximates a curve or a polygon with another curve/polygon with less +vertices so that the distance between them is less or equal to the specified precision. It uses the +Douglas-Peucker algorithm + +@param curve Input vector of a 2D point stored in std::vector or Mat +@param approxCurve Result of the approximation. The type should match the type of the input curve. +@param epsilon Parameter specifying the approximation accuracy. This is the maximum distance +between the original curve and its approximation. +@param closed If true, the approximated curve is closed (its first and last vertices are +connected). Otherwise, it is not closed. + */ +CV_EXPORTS_W void approxPolyDP( InputArray curve, + OutputArray approxCurve, + double epsilon, bool closed ); + +/** @brief Calculates a contour perimeter or a curve length. + +The function computes a curve length or a closed contour perimeter. + +@param curve Input vector of 2D points, stored in std::vector or Mat. +@param closed Flag indicating whether the curve is closed or not. + */ +CV_EXPORTS_W double arcLength( InputArray curve, bool closed ); + +/** @brief Calculates the up-right bounding rectangle of a point set or non-zero pixels of gray-scale image. + +The function calculates and returns the minimal up-right bounding rectangle for the specified point set or +non-zero pixels of gray-scale image. + +@param array Input gray-scale image or 2D point set, stored in std::vector or Mat. + */ +CV_EXPORTS_W Rect boundingRect( InputArray array ); + +/** @brief Calculates a contour area. + +The function computes a contour area. Similarly to moments , the area is computed using the Green +formula. Thus, the returned area and the number of non-zero pixels, if you draw the contour using +#drawContours or #fillPoly , can be different. Also, the function will most certainly give a wrong +results for contours with self-intersections. + +Example: +@code + vector contour; + contour.push_back(Point2f(0, 0)); + contour.push_back(Point2f(10, 0)); + contour.push_back(Point2f(10, 10)); + contour.push_back(Point2f(5, 4)); + + double area0 = contourArea(contour); + vector approx; + approxPolyDP(contour, approx, 5, true); + double area1 = contourArea(approx); + + cout << "area0 =" << area0 << endl << + "area1 =" << area1 << endl << + "approx poly vertices" << approx.size() << endl; +@endcode +@param contour Input vector of 2D points (contour vertices), stored in std::vector or Mat. +@param oriented Oriented area flag. If it is true, the function returns a signed area value, +depending on the contour orientation (clockwise or counter-clockwise). Using this feature you can +determine orientation of a contour by taking the sign of an area. By default, the parameter is +false, which means that the absolute value is returned. + */ +CV_EXPORTS_W double contourArea( InputArray contour, bool oriented = false ); + +/** @brief Finds a rotated rectangle of the minimum area enclosing the input 2D point set. + +The function calculates and returns the minimum-area bounding rectangle (possibly rotated) for a +specified point set. Developer should keep in mind that the returned RotatedRect can contain negative +indices when data is close to the containing Mat element boundary. + +@param points Input vector of 2D points, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect minAreaRect( InputArray points ); + +/** @brief Finds the four vertices of a rotated rect. Useful to draw the rotated rectangle. + +The function finds the four vertices of a rotated rectangle. This function is useful to draw the +rectangle. In C++, instead of using this function, you can directly use RotatedRect::points method. Please +visit the @ref tutorial_bounding_rotated_ellipses "tutorial on Creating Bounding rotated boxes and ellipses for contours" for more information. + +@param box The input rotated rectangle. It may be the output of +@param points The output array of four vertices of rectangles. + */ +CV_EXPORTS_W void boxPoints(RotatedRect box, OutputArray points); + +/** @brief Finds a circle of the minimum area enclosing a 2D point set. + +The function finds the minimal enclosing circle of a 2D point set using an iterative algorithm. + +@param points Input vector of 2D points, stored in std::vector\<\> or Mat +@param center Output center of the circle. +@param radius Output radius of the circle. + */ +CV_EXPORTS_W void minEnclosingCircle( InputArray points, + CV_OUT Point2f& center, CV_OUT float& radius ); + +/** @example samples/cpp/minarea.cpp +*/ + +/** @brief Finds a triangle of minimum area enclosing a 2D point set and returns its area. + +The function finds a triangle of minimum area enclosing the given set of 2D points and returns its +area. The output for a given 2D point set is shown in the image below. 2D points are depicted in +*red* and the enclosing triangle in *yellow*. + +![Sample output of the minimum enclosing triangle function](pics/minenclosingtriangle.png) + +The implementation of the algorithm is based on O'Rourke's @cite ORourke86 and Klee and Laskowski's +@cite KleeLaskowski85 papers. O'Rourke provides a \f$\theta(n)\f$ algorithm for finding the minimal +enclosing triangle of a 2D convex polygon with n vertices. Since the #minEnclosingTriangle function +takes a 2D point set as input an additional preprocessing step of computing the convex hull of the +2D point set is required. The complexity of the #convexHull function is \f$O(n log(n))\f$ which is higher +than \f$\theta(n)\f$. Thus the overall complexity of the function is \f$O(n log(n))\f$. + +@param points Input vector of 2D points with depth CV_32S or CV_32F, stored in std::vector\<\> or Mat +@param triangle Output vector of three 2D points defining the vertices of the triangle. The depth +of the OutputArray must be CV_32F. + */ +CV_EXPORTS_W double minEnclosingTriangle( InputArray points, CV_OUT OutputArray triangle ); + +/** @brief Compares two shapes. + +The function compares two shapes. All three implemented methods use the Hu invariants (see #HuMoments) + +@param contour1 First contour or grayscale image. +@param contour2 Second contour or grayscale image. +@param method Comparison method, see #ShapeMatchModes +@param parameter Method-specific parameter (not supported now). + */ +CV_EXPORTS_W double matchShapes( InputArray contour1, InputArray contour2, + int method, double parameter ); + +/** @example samples/cpp/convexhull.cpp +An example using the convexHull functionality +*/ + +/** @brief Finds the convex hull of a point set. + +The function cv::convexHull finds the convex hull of a 2D point set using the Sklansky's algorithm @cite Sklansky82 +that has *O(N logN)* complexity in the current implementation. + +@param points Input 2D point set, stored in std::vector or Mat. +@param hull Output convex hull. It is either an integer vector of indices or vector of points. In +the first case, the hull elements are 0-based indices of the convex hull points in the original +array (since the set of convex hull points is a subset of the original point set). In the second +case, hull elements are the convex hull points themselves. +@param clockwise Orientation flag. If it is true, the output convex hull is oriented clockwise. +Otherwise, it is oriented counter-clockwise. The assumed coordinate system has its X axis pointing +to the right, and its Y axis pointing upwards. +@param returnPoints Operation flag. In case of a matrix, when the flag is true, the function +returns convex hull points. Otherwise, it returns indices of the convex hull points. When the +output array is std::vector, the flag is ignored, and the output depends on the type of the +vector: std::vector\ implies returnPoints=false, std::vector\ implies +returnPoints=true. + +@note `points` and `hull` should be different arrays, inplace processing isn't supported. + +Check @ref tutorial_hull "the corresponding tutorial" for more details. + +useful links: + +https://www.learnopencv.com/convex-hull-using-opencv-in-python-and-c/ + */ +CV_EXPORTS_W void convexHull( InputArray points, OutputArray hull, + bool clockwise = false, bool returnPoints = true ); + +/** @brief Finds the convexity defects of a contour. + +The figure below displays convexity defects of a hand contour: + +![image](pics/defects.png) + +@param contour Input contour. +@param convexhull Convex hull obtained using convexHull that should contain indices of the contour +points that make the hull. +@param convexityDefects The output vector of convexity defects. In C++ and the new Python/Java +interface each convexity defect is represented as 4-element integer vector (a.k.a. #Vec4i): +(start_index, end_index, farthest_pt_index, fixpt_depth), where indices are 0-based indices +in the original contour of the convexity defect beginning, end and the farthest point, and +fixpt_depth is fixed-point approximation (with 8 fractional bits) of the distance between the +farthest contour point and the hull. That is, to get the floating-point value of the depth will be +fixpt_depth/256.0. + */ +CV_EXPORTS_W void convexityDefects( InputArray contour, InputArray convexhull, OutputArray convexityDefects ); + +/** @brief Tests a contour convexity. + +The function tests whether the input contour is convex or not. The contour must be simple, that is, +without self-intersections. Otherwise, the function output is undefined. + +@param contour Input vector of 2D points, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W bool isContourConvex( InputArray contour ); + +/** @example samples/cpp/intersectExample.cpp +Examples of how intersectConvexConvex works +*/ + +/** @brief Finds intersection of two convex polygons + +@param _p1 First polygon +@param _p2 Second polygon +@param _p12 Output polygon describing the intersecting area +@param handleNested When true, an intersection is found if one of the polygons is fully enclosed in the other. +When false, no intersection is found. If the polygons share a side or the vertex of one polygon lies on an edge +of the other, they are not considered nested and an intersection will be found regardless of the value of handleNested. + +@returns Absolute value of area of intersecting polygon + +@note intersectConvexConvex doesn't confirm that both polygons are convex and will return invalid results if they aren't. + */ +CV_EXPORTS_W float intersectConvexConvex( InputArray _p1, InputArray _p2, + OutputArray _p12, bool handleNested = true ); + +/** @example samples/cpp/fitellipse.cpp +An example using the fitEllipse technique +*/ + +/** @brief Fits an ellipse around a set of 2D points. + +The function calculates the ellipse that fits (in a least-squares sense) a set of 2D points best of +all. It returns the rotated rectangle in which the ellipse is inscribed. The first algorithm described by @cite Fitzgibbon95 +is used. Developer should keep in mind that it is possible that the returned +ellipse/rotatedRect data contains negative indices, due to the data points being close to the +border of the containing Mat element. + +@param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipse( InputArray points ); + +/** @brief Fits an ellipse around a set of 2D points. + + The function calculates the ellipse that fits a set of 2D points. + It returns the rotated rectangle in which the ellipse is inscribed. + The Approximate Mean Square (AMS) proposed by @cite Taubin1991 is used. + + For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$, + which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$. + However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$, + the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines, + quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits. + If the fit is found to be a parabolic or hyperbolic function then the standard #fitEllipse method is used. + The AMS method restricts the fit to parabolic, hyperbolic and elliptical curves + by imposing the condition that \f$ A^T ( D_x^T D_x + D_y^T D_y) A = 1 \f$ where + the matrices \f$ Dx \f$ and \f$ Dy \f$ are the partial derivatives of the design matrix \f$ D \f$ with + respect to x and y. The matrices are formed row by row applying the following to + each of the points in the set: + \f{align*}{ + D(i,:)&=\left\{x_i^2, x_i y_i, y_i^2, x_i, y_i, 1\right\} & + D_x(i,:)&=\left\{2 x_i,y_i,0,1,0,0\right\} & + D_y(i,:)&=\left\{0,x_i,2 y_i,0,1,0\right\} + \f} + The AMS method minimizes the cost function + \f{equation*}{ + \epsilon ^2=\frac{ A^T D^T D A }{ A^T (D_x^T D_x + D_y^T D_y) A^T } + \f} + + The minimum cost is found by solving the generalized eigenvalue problem. + + \f{equation*}{ + D^T D A = \lambda \left( D_x^T D_x + D_y^T D_y\right) A + \f} + + @param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipseAMS( InputArray points ); + + +/** @brief Fits an ellipse around a set of 2D points. + + The function calculates the ellipse that fits a set of 2D points. + It returns the rotated rectangle in which the ellipse is inscribed. + The Direct least square (Direct) method by @cite Fitzgibbon1999 is used. + + For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$, + which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$. + However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$, + the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines, + quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits. + The Direct method confines the fit to ellipses by ensuring that \f$ 4 A_{xx} A_{yy}- A_{xy}^2 > 0 \f$. + The condition imposed is that \f$ 4 A_{xx} A_{yy}- A_{xy}^2=1 \f$ which satisfies the inequality + and as the coefficients can be arbitrarily scaled is not overly restrictive. + + \f{equation*}{ + \epsilon ^2= A^T D^T D A \quad \text{with} \quad A^T C A =1 \quad \text{and} \quad C=\left(\begin{matrix} + 0 & 0 & 2 & 0 & 0 & 0 \\ + 0 & -1 & 0 & 0 & 0 & 0 \\ + 2 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 + \end{matrix} \right) + \f} + + The minimum cost is found by solving the generalized eigenvalue problem. + + \f{equation*}{ + D^T D A = \lambda \left( C\right) A + \f} + + The system produces only one positive eigenvalue \f$ \lambda\f$ which is chosen as the solution + with its eigenvector \f$\mathbf{u}\f$. These are used to find the coefficients + + \f{equation*}{ + A = \sqrt{\frac{1}{\mathbf{u}^T C \mathbf{u}}} \mathbf{u} + \f} + The scaling factor guarantees that \f$A^T C A =1\f$. + + @param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipseDirect( InputArray points ); + +/** @brief Fits a line to a 2D or 3D point set. + +The function fitLine fits a line to a 2D or 3D point set by minimizing \f$\sum_i \rho(r_i)\f$ where +\f$r_i\f$ is a distance between the \f$i^{th}\f$ point, the line and \f$\rho(r)\f$ is a distance function, one +of the following: +- DIST_L2 +\f[\rho (r) = r^2/2 \quad \text{(the simplest and the fastest least-squares method)}\f] +- DIST_L1 +\f[\rho (r) = r\f] +- DIST_L12 +\f[\rho (r) = 2 \cdot ( \sqrt{1 + \frac{r^2}{2}} - 1)\f] +- DIST_FAIR +\f[\rho \left (r \right ) = C^2 \cdot \left ( \frac{r}{C} - \log{\left(1 + \frac{r}{C}\right)} \right ) \quad \text{where} \quad C=1.3998\f] +- DIST_WELSCH +\f[\rho \left (r \right ) = \frac{C^2}{2} \cdot \left ( 1 - \exp{\left(-\left(\frac{r}{C}\right)^2\right)} \right ) \quad \text{where} \quad C=2.9846\f] +- DIST_HUBER +\f[\rho (r) = \fork{r^2/2}{if \(r < C\)}{C \cdot (r-C/2)}{otherwise} \quad \text{where} \quad C=1.345\f] + +The algorithm is based on the M-estimator ( ) technique +that iteratively fits the line using the weighted least-squares algorithm. After each iteration the +weights \f$w_i\f$ are adjusted to be inversely proportional to \f$\rho(r_i)\f$ . + +@param points Input vector of 2D or 3D points, stored in std::vector\<\> or Mat. +@param line Output line parameters. In case of 2D fitting, it should be a vector of 4 elements +(like Vec4f) - (vx, vy, x0, y0), where (vx, vy) is a normalized vector collinear to the line and +(x0, y0) is a point on the line. In case of 3D fitting, it should be a vector of 6 elements (like +Vec6f) - (vx, vy, vz, x0, y0, z0), where (vx, vy, vz) is a normalized vector collinear to the line +and (x0, y0, z0) is a point on the line. +@param distType Distance used by the M-estimator, see #DistanceTypes +@param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value +is chosen. +@param reps Sufficient accuracy for the radius (distance between the coordinate origin and the line). +@param aeps Sufficient accuracy for the angle. 0.01 would be a good default value for reps and aeps. + */ +CV_EXPORTS_W void fitLine( InputArray points, OutputArray line, int distType, + double param, double reps, double aeps ); + +/** @brief Performs a point-in-contour test. + +The function determines whether the point is inside a contour, outside, or lies on an edge (or +coincides with a vertex). It returns positive (inside), negative (outside), or zero (on an edge) +value, correspondingly. When measureDist=false , the return value is +1, -1, and 0, respectively. +Otherwise, the return value is a signed distance between the point and the nearest contour edge. + +See below a sample output of the function where each image pixel is tested against the contour: + +![sample output](pics/pointpolygon.png) + +@param contour Input contour. +@param pt Point tested against the contour. +@param measureDist If true, the function estimates the signed distance from the point to the +nearest contour edge. Otherwise, the function only checks if the point is inside a contour or not. + */ +CV_EXPORTS_W double pointPolygonTest( InputArray contour, Point2f pt, bool measureDist ); + +/** @brief Finds out if there is any intersection between two rotated rectangles. + +If there is then the vertices of the intersecting region are returned as well. + +Below are some examples of intersection configurations. The hatched pattern indicates the +intersecting region and the red vertices are returned by the function. + +![intersection examples](pics/intersection.png) + +@param rect1 First rectangle +@param rect2 Second rectangle +@param intersectingRegion The output array of the vertices of the intersecting region. It returns +at most 8 vertices. Stored as std::vector\ or cv::Mat as Mx1 of type CV_32FC2. +@returns One of #RectanglesIntersectTypes + */ +CV_EXPORTS_W int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& rect2, OutputArray intersectingRegion ); + +/** @brief Creates a smart pointer to a cv::GeneralizedHoughBallard class and initializes it. +*/ +CV_EXPORTS_W Ptr createGeneralizedHoughBallard(); + +/** @brief Creates a smart pointer to a cv::GeneralizedHoughGuil class and initializes it. +*/ +CV_EXPORTS_W Ptr createGeneralizedHoughGuil(); + +//! @} imgproc_shape + +//! @addtogroup imgproc_colormap +//! @{ + +//! GNU Octave/MATLAB equivalent colormaps +enum ColormapTypes +{ + COLORMAP_AUTUMN = 0, //!< ![autumn](pics/colormaps/colorscale_autumn.jpg) + COLORMAP_BONE = 1, //!< ![bone](pics/colormaps/colorscale_bone.jpg) + COLORMAP_JET = 2, //!< ![jet](pics/colormaps/colorscale_jet.jpg) + COLORMAP_WINTER = 3, //!< ![winter](pics/colormaps/colorscale_winter.jpg) + COLORMAP_RAINBOW = 4, //!< ![rainbow](pics/colormaps/colorscale_rainbow.jpg) + COLORMAP_OCEAN = 5, //!< ![ocean](pics/colormaps/colorscale_ocean.jpg) + COLORMAP_SUMMER = 6, //!< ![summer](pics/colormaps/colorscale_summer.jpg) + COLORMAP_SPRING = 7, //!< ![spring](pics/colormaps/colorscale_spring.jpg) + COLORMAP_COOL = 8, //!< ![cool](pics/colormaps/colorscale_cool.jpg) + COLORMAP_HSV = 9, //!< ![HSV](pics/colormaps/colorscale_hsv.jpg) + COLORMAP_PINK = 10, //!< ![pink](pics/colormaps/colorscale_pink.jpg) + COLORMAP_HOT = 11, //!< ![hot](pics/colormaps/colorscale_hot.jpg) + COLORMAP_PARULA = 12, //!< ![parula](pics/colormaps/colorscale_parula.jpg) + COLORMAP_MAGMA = 13, //!< ![magma](pics/colormaps/colorscale_magma.jpg) + COLORMAP_INFERNO = 14, //!< ![inferno](pics/colormaps/colorscale_inferno.jpg) + COLORMAP_PLASMA = 15, //!< ![plasma](pics/colormaps/colorscale_plasma.jpg) + COLORMAP_VIRIDIS = 16, //!< ![viridis](pics/colormaps/colorscale_viridis.jpg) + COLORMAP_CIVIDIS = 17, //!< ![cividis](pics/colormaps/colorscale_cividis.jpg) + COLORMAP_TWILIGHT = 18, //!< ![twilight](pics/colormaps/colorscale_twilight.jpg) + COLORMAP_TWILIGHT_SHIFTED = 19, //!< ![twilight shifted](pics/colormaps/colorscale_twilight_shifted.jpg) + COLORMAP_TURBO = 20, //!< ![turbo](pics/colormaps/colorscale_turbo.jpg) + COLORMAP_DEEPGREEN = 21 //!< ![deepgreen](pics/colormaps/colorscale_deepgreen.jpg) +}; + +/** @example samples/cpp/falsecolor.cpp +An example using applyColorMap function +*/ + +/** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image. + +@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. +@param dst The result is the colormapped source image. Note: Mat::create is called on dst. +@param colormap The colormap to apply, see #ColormapTypes +*/ +CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap); + +/** @brief Applies a user colormap on a given image. + +@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. +@param dst The result is the colormapped source image. Note: Mat::create is called on dst. +@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256 +*/ +CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor); + +//! @} imgproc_colormap + +//! @addtogroup imgproc_draw +//! @{ + + +/** OpenCV color channel order is BGR[A] */ +#define CV_RGB(r, g, b) cv::Scalar((b), (g), (r), 0) + +/** @brief Draws a line segment connecting two points. + +The function line draws the line segment between pt1 and pt2 points in the image. The line is +clipped by the image boundaries. For non-antialiased lines with integer coordinates, the 8-connected +or 4-connected Bresenham algorithm is used. Thick lines are drawn with rounding endings. Antialiased +lines are drawn using Gaussian filtering. + +@param img Image. +@param pt1 First point of the line segment. +@param pt2 Second point of the line segment. +@param color Line color. +@param thickness Line thickness. +@param lineType Type of the line. See #LineTypes. +@param shift Number of fractional bits in the point coordinates. + */ +CV_EXPORTS_W void line(InputOutputArray img, Point pt1, Point pt2, const Scalar& color, + int thickness = 1, int lineType = LINE_8, int shift = 0); + +/** @brief Draws a arrow segment pointing from the first point to the second one. + +The function cv::arrowedLine draws an arrow between pt1 and pt2 points in the image. See also #line. + +@param img Image. +@param pt1 The point the arrow starts from. +@param pt2 The point the arrow points to. +@param color Line color. +@param thickness Line thickness. +@param line_type Type of the line. See #LineTypes +@param shift Number of fractional bits in the point coordinates. +@param tipLength The length of the arrow tip in relation to the arrow length + */ +CV_EXPORTS_W void arrowedLine(InputOutputArray img, Point pt1, Point pt2, const Scalar& color, + int thickness=1, int line_type=8, int shift=0, double tipLength=0.1); + +/** @brief Draws a simple, thick, or filled up-right rectangle. + +The function cv::rectangle draws a rectangle outline or a filled rectangle whose two opposite corners +are pt1 and pt2. + +@param img Image. +@param pt1 Vertex of the rectangle. +@param pt2 Vertex of the rectangle opposite to pt1 . +@param color Rectangle color or brightness (grayscale image). +@param thickness Thickness of lines that make up the rectangle. Negative values, like #FILLED, +mean that the function has to draw a filled rectangle. +@param lineType Type of the line. See #LineTypes +@param shift Number of fractional bits in the point coordinates. + */ +CV_EXPORTS_W void rectangle(InputOutputArray img, Point pt1, Point pt2, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @overload + +use `rec` parameter as alternative specification of the drawn rectangle: `r.tl() and +r.br()-Point(1,1)` are opposite corners +*/ +CV_EXPORTS_W void rectangle(InputOutputArray img, Rect rec, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_2.cpp +An example using drawing functions +*/ + +/** @brief Draws a circle. + +The function cv::circle draws a simple or filled circle with a given center and radius. +@param img Image where the circle is drawn. +@param center Center of the circle. +@param radius Radius of the circle. +@param color Circle color. +@param thickness Thickness of the circle outline, if positive. Negative values, like #FILLED, +mean that a filled circle is to be drawn. +@param lineType Type of the circle boundary. See #LineTypes +@param shift Number of fractional bits in the coordinates of the center and in the radius value. + */ +CV_EXPORTS_W void circle(InputOutputArray img, Point center, int radius, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @brief Draws a simple or thick elliptic arc or fills an ellipse sector. + +The function cv::ellipse with more parameters draws an ellipse outline, a filled ellipse, an elliptic +arc, or a filled ellipse sector. The drawing code uses general parametric form. +A piecewise-linear curve is used to approximate the elliptic arc +boundary. If you need more control of the ellipse rendering, you can retrieve the curve using +#ellipse2Poly and then render it with #polylines or fill it with #fillPoly. If you use the first +variant of the function and want to draw the whole ellipse, not an arc, pass `startAngle=0` and +`endAngle=360`. If `startAngle` is greater than `endAngle`, they are swapped. The figure below explains +the meaning of the parameters to draw the blue arc. + +![Parameters of Elliptic Arc](pics/ellipse.svg) + +@param img Image. +@param center Center of the ellipse. +@param axes Half of the size of the ellipse main axes. +@param angle Ellipse rotation angle in degrees. +@param startAngle Starting angle of the elliptic arc in degrees. +@param endAngle Ending angle of the elliptic arc in degrees. +@param color Ellipse color. +@param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that +a filled ellipse sector is to be drawn. +@param lineType Type of the ellipse boundary. See #LineTypes +@param shift Number of fractional bits in the coordinates of the center and values of axes. + */ +CV_EXPORTS_W void ellipse(InputOutputArray img, Point center, Size axes, + double angle, double startAngle, double endAngle, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @overload +@param img Image. +@param box Alternative ellipse representation via RotatedRect. This means that the function draws +an ellipse inscribed in the rotated rectangle. +@param color Ellipse color. +@param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that +a filled ellipse sector is to be drawn. +@param lineType Type of the ellipse boundary. See #LineTypes +*/ +CV_EXPORTS_W void ellipse(InputOutputArray img, const RotatedRect& box, const Scalar& color, + int thickness = 1, int lineType = LINE_8); + +/* ----------------------------------------------------------------------------------------- */ +/* ADDING A SET OF PREDEFINED MARKERS WHICH COULD BE USED TO HIGHLIGHT POSITIONS IN AN IMAGE */ +/* ----------------------------------------------------------------------------------------- */ + +/** @brief Draws a marker on a predefined position in an image. + +The function cv::drawMarker draws a marker on a given position in the image. For the moment several +marker types are supported, see #MarkerTypes for more information. + +@param img Image. +@param position The point where the crosshair is positioned. +@param color Line color. +@param markerType The specific type of marker you want to use, see #MarkerTypes +@param thickness Line thickness. +@param line_type Type of the line, See #LineTypes +@param markerSize The length of the marker axis [default = 20 pixels] + */ +CV_EXPORTS_W void drawMarker(InputOutputArray img, Point position, const Scalar& color, + int markerType = MARKER_CROSS, int markerSize=20, int thickness=1, + int line_type=8); + +/* ----------------------------------------------------------------------------------------- */ +/* END OF MARKER SECTION */ +/* ----------------------------------------------------------------------------------------- */ + +/** @overload */ +CV_EXPORTS void fillConvexPoly(InputOutputArray img, const Point* pts, int npts, + const Scalar& color, int lineType = LINE_8, + int shift = 0); + +/** @brief Fills a convex polygon. + +The function cv::fillConvexPoly draws a filled convex polygon. This function is much faster than the +function #fillPoly . It can fill not only convex polygons but any monotonic polygon without +self-intersections, that is, a polygon whose contour intersects every horizontal line (scan line) +twice at the most (though, its top-most and/or the bottom edge could be horizontal). + +@param img Image. +@param points Polygon vertices. +@param color Polygon color. +@param lineType Type of the polygon boundaries. See #LineTypes +@param shift Number of fractional bits in the vertex coordinates. + */ +CV_EXPORTS_W void fillConvexPoly(InputOutputArray img, InputArray points, + const Scalar& color, int lineType = LINE_8, + int shift = 0); + +/** @overload */ +CV_EXPORTS void fillPoly(InputOutputArray img, const Point** pts, + const int* npts, int ncontours, + const Scalar& color, int lineType = LINE_8, int shift = 0, + Point offset = Point() ); + +/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_1.cpp +An example using drawing functions +Check @ref tutorial_random_generator_and_text "the corresponding tutorial" for more details +*/ + +/** @brief Fills the area bounded by one or more polygons. + +The function cv::fillPoly fills an area bounded by several polygonal contours. The function can fill +complex areas, for example, areas with holes, contours with self-intersections (some of their +parts), and so forth. + +@param img Image. +@param pts Array of polygons where each polygon is represented as an array of points. +@param color Polygon color. +@param lineType Type of the polygon boundaries. See #LineTypes +@param shift Number of fractional bits in the vertex coordinates. +@param offset Optional offset of all points of the contours. + */ +CV_EXPORTS_W void fillPoly(InputOutputArray img, InputArrayOfArrays pts, + const Scalar& color, int lineType = LINE_8, int shift = 0, + Point offset = Point() ); + +/** @overload */ +CV_EXPORTS void polylines(InputOutputArray img, const Point* const* pts, const int* npts, + int ncontours, bool isClosed, const Scalar& color, + int thickness = 1, int lineType = LINE_8, int shift = 0 ); + +/** @brief Draws several polygonal curves. + +@param img Image. +@param pts Array of polygonal curves. +@param isClosed Flag indicating whether the drawn polylines are closed or not. If they are closed, +the function draws a line from the last vertex of each curve to its first vertex. +@param color Polyline color. +@param thickness Thickness of the polyline edges. +@param lineType Type of the line segments. See #LineTypes +@param shift Number of fractional bits in the vertex coordinates. + +The function cv::polylines draws one or more polygonal curves. + */ +CV_EXPORTS_W void polylines(InputOutputArray img, InputArrayOfArrays pts, + bool isClosed, const Scalar& color, + int thickness = 1, int lineType = LINE_8, int shift = 0 ); + +/** @example samples/cpp/contours2.cpp +An example program illustrates the use of cv::findContours and cv::drawContours +\image html WindowsQtContoursOutput.png "Screenshot of the program" +*/ + +/** @example samples/cpp/segment_objects.cpp +An example using drawContours to clean up a background segmentation result +*/ + +/** @brief Draws contours outlines or filled contours. + +The function draws contour outlines in the image if \f$\texttt{thickness} \ge 0\f$ or fills the area +bounded by the contours if \f$\texttt{thickness}<0\f$ . The example below shows how to retrieve +connected components from the binary image and label them: : +@include snippets/imgproc_drawContours.cpp + +@param image Destination image. +@param contours All the input contours. Each contour is stored as a point vector. +@param contourIdx Parameter indicating a contour to draw. If it is negative, all the contours are drawn. +@param color Color of the contours. +@param thickness Thickness of lines the contours are drawn with. If it is negative (for example, +thickness=#FILLED ), the contour interiors are drawn. +@param lineType Line connectivity. See #LineTypes +@param hierarchy Optional information about hierarchy. It is only needed if you want to draw only +some of the contours (see maxLevel ). +@param maxLevel Maximal level for drawn contours. If it is 0, only the specified contour is drawn. +If it is 1, the function draws the contour(s) and all the nested contours. If it is 2, the function +draws the contours, all the nested contours, all the nested-to-nested contours, and so on. This +parameter is only taken into account when there is hierarchy available. +@param offset Optional contour shift parameter. Shift all the drawn contours by the specified +\f$\texttt{offset}=(dx,dy)\f$ . +@note When thickness=#FILLED, the function is designed to handle connected components with holes correctly +even when no hierarchy date is provided. This is done by analyzing all the outlines together +using even-odd rule. This may give incorrect results if you have a joint collection of separately retrieved +contours. In order to solve this problem, you need to call #drawContours separately for each sub-group +of contours, or iterate over the collection using contourIdx parameter. + */ +CV_EXPORTS_W void drawContours( InputOutputArray image, InputArrayOfArrays contours, + int contourIdx, const Scalar& color, + int thickness = 1, int lineType = LINE_8, + InputArray hierarchy = noArray(), + int maxLevel = INT_MAX, Point offset = Point() ); + +/** @brief Clips the line against the image rectangle. + +The function cv::clipLine calculates a part of the line segment that is entirely within the specified +rectangle. it returns false if the line segment is completely outside the rectangle. Otherwise, +it returns true . +@param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) . +@param pt1 First line point. +@param pt2 Second line point. + */ +CV_EXPORTS bool clipLine(Size imgSize, CV_IN_OUT Point& pt1, CV_IN_OUT Point& pt2); + +/** @overload +@param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) . +@param pt1 First line point. +@param pt2 Second line point. +*/ +CV_EXPORTS bool clipLine(Size2l imgSize, CV_IN_OUT Point2l& pt1, CV_IN_OUT Point2l& pt2); + +/** @overload +@param imgRect Image rectangle. +@param pt1 First line point. +@param pt2 Second line point. +*/ +CV_EXPORTS_W bool clipLine(Rect imgRect, CV_OUT CV_IN_OUT Point& pt1, CV_OUT CV_IN_OUT Point& pt2); + +/** @brief Approximates an elliptic arc with a polyline. + +The function ellipse2Poly computes the vertices of a polyline that approximates the specified +elliptic arc. It is used by #ellipse. If `arcStart` is greater than `arcEnd`, they are swapped. + +@param center Center of the arc. +@param axes Half of the size of the ellipse main axes. See #ellipse for details. +@param angle Rotation angle of the ellipse in degrees. See #ellipse for details. +@param arcStart Starting angle of the elliptic arc in degrees. +@param arcEnd Ending angle of the elliptic arc in degrees. +@param delta Angle between the subsequent polyline vertices. It defines the approximation +accuracy. +@param pts Output vector of polyline vertices. + */ +CV_EXPORTS_W void ellipse2Poly( Point center, Size axes, int angle, + int arcStart, int arcEnd, int delta, + CV_OUT std::vector& pts ); + +/** @overload +@param center Center of the arc. +@param axes Half of the size of the ellipse main axes. See #ellipse for details. +@param angle Rotation angle of the ellipse in degrees. See #ellipse for details. +@param arcStart Starting angle of the elliptic arc in degrees. +@param arcEnd Ending angle of the elliptic arc in degrees. +@param delta Angle between the subsequent polyline vertices. It defines the approximation accuracy. +@param pts Output vector of polyline vertices. +*/ +CV_EXPORTS void ellipse2Poly(Point2d center, Size2d axes, int angle, + int arcStart, int arcEnd, int delta, + CV_OUT std::vector& pts); + +/** @brief Draws a text string. + +The function cv::putText renders the specified text string in the image. Symbols that cannot be rendered +using the specified font are replaced by question marks. See #getTextSize for a text rendering code +example. + +@param img Image. +@param text Text string to be drawn. +@param org Bottom-left corner of the text string in the image. +@param fontFace Font type, see #HersheyFonts. +@param fontScale Font scale factor that is multiplied by the font-specific base size. +@param color Text color. +@param thickness Thickness of the lines used to draw a text. +@param lineType Line type. See #LineTypes +@param bottomLeftOrigin When true, the image data origin is at the bottom-left corner. Otherwise, +it is at the top-left corner. + */ +CV_EXPORTS_W void putText( InputOutputArray img, const String& text, Point org, + int fontFace, double fontScale, Scalar color, + int thickness = 1, int lineType = LINE_8, + bool bottomLeftOrigin = false ); + +/** @brief Calculates the width and height of a text string. + +The function cv::getTextSize calculates and returns the size of a box that contains the specified text. +That is, the following code renders some text, the tight box surrounding it, and the baseline: : +@code + String text = "Funny text inside the box"; + int fontFace = FONT_HERSHEY_SCRIPT_SIMPLEX; + double fontScale = 2; + int thickness = 3; + + Mat img(600, 800, CV_8UC3, Scalar::all(0)); + + int baseline=0; + Size textSize = getTextSize(text, fontFace, + fontScale, thickness, &baseline); + baseline += thickness; + + // center the text + Point textOrg((img.cols - textSize.width)/2, + (img.rows + textSize.height)/2); + + // draw the box + rectangle(img, textOrg + Point(0, baseline), + textOrg + Point(textSize.width, -textSize.height), + Scalar(0,0,255)); + // ... and the baseline first + line(img, textOrg + Point(0, thickness), + textOrg + Point(textSize.width, thickness), + Scalar(0, 0, 255)); + + // then put the text itself + putText(img, text, textOrg, fontFace, fontScale, + Scalar::all(255), thickness, 8); +@endcode + +@param text Input text string. +@param fontFace Font to use, see #HersheyFonts. +@param fontScale Font scale factor that is multiplied by the font-specific base size. +@param thickness Thickness of lines used to render the text. See #putText for details. +@param[out] baseLine y-coordinate of the baseline relative to the bottom-most text +point. +@return The size of a box that contains the specified text. + +@see putText + */ +CV_EXPORTS_W Size getTextSize(const String& text, int fontFace, + double fontScale, int thickness, + CV_OUT int* baseLine); + + +/** @brief Calculates the font-specific size to use to achieve a given height in pixels. + +@param fontFace Font to use, see cv::HersheyFonts. +@param pixelHeight Pixel height to compute the fontScale for +@param thickness Thickness of lines used to render the text.See putText for details. +@return The fontSize to use for cv::putText + +@see cv::putText +*/ +CV_EXPORTS_W double getFontScaleFromHeight(const int fontFace, + const int pixelHeight, + const int thickness = 1); + +/** @brief Line iterator + +The class is used to iterate over all the pixels on the raster line +segment connecting two specified points. + +The class LineIterator is used to get each pixel of a raster line. It +can be treated as versatile implementation of the Bresenham algorithm +where you can stop at each pixel and do some extra processing, for +example, grab pixel values along the line or draw a line with an effect +(for example, with XOR operation). + +The number of pixels along the line is stored in LineIterator::count. +The method LineIterator::pos returns the current position in the image: + +@code{.cpp} +// grabs pixels along the line (pt1, pt2) +// from 8-bit 3-channel image to the buffer +LineIterator it(img, pt1, pt2, 8); +LineIterator it2 = it; +vector buf(it.count); + +for(int i = 0; i < it.count; i++, ++it) + buf[i] = *(const Vec3b*)*it; + +// alternative way of iterating through the line +for(int i = 0; i < it2.count; i++, ++it2) +{ + Vec3b val = img.at(it2.pos()); + CV_Assert(buf[i] == val); +} +@endcode +*/ +class CV_EXPORTS LineIterator +{ +public: + /** @brief initializes the iterator + + creates iterators for the line connecting pt1 and pt2 + the line will be clipped on the image boundaries + the line is 8-connected or 4-connected + If leftToRight=true, then the iteration is always done + from the left-most point to the right most, + not to depend on the ordering of pt1 and pt2 parameters; + */ + LineIterator( const Mat& img, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(&img, Rect(0, 0, img.cols, img.rows), pt1, pt2, connectivity, leftToRight); + ptmode = false; + } + LineIterator( Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, Rect(std::min(pt1.x, pt2.x), + std::min(pt1.y, pt2.y), + std::max(pt1.x, pt2.x) - std::min(pt1.x, pt2.x) + 1, + std::max(pt1.y, pt2.y) - std::min(pt1.y, pt2.y) + 1), + pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + LineIterator( Size boundingAreaSize, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, Rect(0, 0, boundingAreaSize.width, boundingAreaSize.height), + pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + LineIterator( Rect boundingAreaRect, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, boundingAreaRect, pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + void init(const Mat* img, Rect boundingAreaRect, Point pt1, Point pt2, int connectivity, bool leftToRight); + + /** @brief returns pointer to the current pixel + */ + uchar* operator *(); + /** @brief prefix increment operator (++it). shifts iterator to the next pixel + */ + LineIterator& operator ++(); + /** @brief postfix increment operator (it++). shifts iterator to the next pixel + */ + LineIterator operator ++(int); + /** @brief returns coordinates of the current pixel + */ + Point pos() const; + + uchar* ptr; + const uchar* ptr0; + int step, elemSize; + int err, count; + int minusDelta, plusDelta; + int minusStep, plusStep; + int minusShift, plusShift; + Point p; + bool ptmode; +}; + +//! @cond IGNORED + +// === LineIterator implementation === + +inline +uchar* LineIterator::operator *() +{ + return ptmode ? 0 : ptr; +} + +inline +LineIterator& LineIterator::operator ++() +{ + int mask = err < 0 ? -1 : 0; + err += minusDelta + (plusDelta & mask); + if(!ptmode) + { + ptr += minusStep + (plusStep & mask); + } + else + { + p.x += minusShift + (plusShift & mask); + p.y += minusStep + (plusStep & mask); + } + return *this; +} + +inline +LineIterator LineIterator::operator ++(int) +{ + LineIterator it = *this; + ++(*this); + return it; +} + +inline +Point LineIterator::pos() const +{ + if(!ptmode) + { + size_t offset = (size_t)(ptr - ptr0); + int y = (int)(offset/step); + int x = (int)((offset - (size_t)y*step)/elemSize); + return Point(x, y); + } + return p; +} + +//! @endcond + +//! @} imgproc_draw + +//! @} imgproc + +} // cv + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/detail/gcgraph.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/detail/gcgraph.hpp new file mode 100644 index 0000000..f17c6e7 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/detail/gcgraph.hpp @@ -0,0 +1,395 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP +#define OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP + +//! @cond IGNORED + +namespace cv { namespace detail { +template class GCGraph +{ +public: + GCGraph(); + GCGraph( unsigned int vtxCount, unsigned int edgeCount ); + ~GCGraph(); + void create( unsigned int vtxCount, unsigned int edgeCount ); + int addVtx(); + void addEdges( int i, int j, TWeight w, TWeight revw ); + void addTermWeights( int i, TWeight sourceW, TWeight sinkW ); + TWeight maxFlow(); + bool inSourceSegment( int i ); +private: + class Vtx + { + public: + Vtx *next; // initialized and used in maxFlow() only + int parent; + int first; + int ts; + int dist; + TWeight weight; + uchar t; + }; + class Edge + { + public: + int dst; + int next; + TWeight weight; + }; + + std::vector vtcs; + std::vector edges; + TWeight flow; +}; + +template +GCGraph::GCGraph() +{ + flow = 0; +} +template +GCGraph::GCGraph( unsigned int vtxCount, unsigned int edgeCount ) +{ + create( vtxCount, edgeCount ); +} +template +GCGraph::~GCGraph() +{ +} +template +void GCGraph::create( unsigned int vtxCount, unsigned int edgeCount ) +{ + vtcs.reserve( vtxCount ); + edges.reserve( edgeCount + 2 ); + flow = 0; +} + +template +int GCGraph::addVtx() +{ + Vtx v; + memset( &v, 0, sizeof(Vtx)); + vtcs.push_back(v); + return (int)vtcs.size() - 1; +} + +template +void GCGraph::addEdges( int i, int j, TWeight w, TWeight revw ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + CV_Assert( j>=0 && j<(int)vtcs.size() ); + CV_Assert( w>=0 && revw>=0 ); + CV_Assert( i != j ); + + if( !edges.size() ) + edges.resize( 2 ); + + Edge fromI, toI; + fromI.dst = j; + fromI.next = vtcs[i].first; + fromI.weight = w; + vtcs[i].first = (int)edges.size(); + edges.push_back( fromI ); + + toI.dst = i; + toI.next = vtcs[j].first; + toI.weight = revw; + vtcs[j].first = (int)edges.size(); + edges.push_back( toI ); +} + +template +void GCGraph::addTermWeights( int i, TWeight sourceW, TWeight sinkW ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + + TWeight dw = vtcs[i].weight; + if( dw > 0 ) + sourceW += dw; + else + sinkW -= dw; + flow += (sourceW < sinkW) ? sourceW : sinkW; + vtcs[i].weight = sourceW - sinkW; +} + +template +TWeight GCGraph::maxFlow() +{ + CV_Assert(!vtcs.empty()); + CV_Assert(!edges.empty()); + const int TERMINAL = -1, ORPHAN = -2; + Vtx stub, *nilNode = &stub, *first = nilNode, *last = nilNode; + int curr_ts = 0; + stub.next = nilNode; + Vtx *vtxPtr = &vtcs[0]; + Edge *edgePtr = &edges[0]; + + std::vector orphans; + + // initialize the active queue and the graph vertices + for( int i = 0; i < (int)vtcs.size(); i++ ) + { + Vtx* v = vtxPtr + i; + v->ts = 0; + if( v->weight != 0 ) + { + last = last->next = v; + v->dist = 1; + v->parent = TERMINAL; + v->t = v->weight < 0; + } + else + v->parent = 0; + } + first = first->next; + last->next = nilNode; + nilNode->next = 0; + + // run the search-path -> augment-graph -> restore-trees loop + for(;;) + { + Vtx* v, *u; + int e0 = -1, ei = 0, ej = 0; + TWeight minWeight, weight; + uchar vt; + + // grow S & T search trees, find an edge connecting them + while( first != nilNode ) + { + v = first; + if( v->parent ) + { + vt = v->t; + for( ei = v->first; ei != 0; ei = edgePtr[ei].next ) + { + if( edgePtr[ei^vt].weight == 0 ) + continue; + u = vtxPtr+edgePtr[ei].dst; + if( !u->parent ) + { + u->t = vt; + u->parent = ei ^ 1; + u->ts = v->ts; + u->dist = v->dist + 1; + if( !u->next ) + { + u->next = nilNode; + last = last->next = u; + } + continue; + } + + if( u->t != vt ) + { + e0 = ei ^ vt; + break; + } + + if( u->dist > v->dist+1 && u->ts <= v->ts ) + { + // reassign the parent + u->parent = ei ^ 1; + u->ts = v->ts; + u->dist = v->dist + 1; + } + } + if( e0 > 0 ) + break; + } + // exclude the vertex from the active list + first = first->next; + v->next = 0; + } + + if( e0 <= 0 ) + break; + + // find the minimum edge weight along the path + minWeight = edgePtr[e0].weight; + CV_Assert( minWeight > 0 ); + // k = 1: source tree, k = 0: destination tree + for( int k = 1; k >= 0; k-- ) + { + for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst ) + { + if( (ei = v->parent) < 0 ) + break; + weight = edgePtr[ei^k].weight; + minWeight = MIN(minWeight, weight); + CV_Assert( minWeight > 0 ); + } + weight = fabs(v->weight); + minWeight = MIN(minWeight, weight); + CV_Assert( minWeight > 0 ); + } + + // modify weights of the edges along the path and collect orphans + edgePtr[e0].weight -= minWeight; + edgePtr[e0^1].weight += minWeight; + flow += minWeight; + + // k = 1: source tree, k = 0: destination tree + for( int k = 1; k >= 0; k-- ) + { + for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst ) + { + if( (ei = v->parent) < 0 ) + break; + edgePtr[ei^(k^1)].weight += minWeight; + if( (edgePtr[ei^k].weight -= minWeight) == 0 ) + { + orphans.push_back(v); + v->parent = ORPHAN; + } + } + + v->weight = v->weight + minWeight*(1-k*2); + if( v->weight == 0 ) + { + orphans.push_back(v); + v->parent = ORPHAN; + } + } + + // restore the search trees by finding new parents for the orphans + curr_ts++; + while( !orphans.empty() ) + { + Vtx* v2 = orphans.back(); + orphans.pop_back(); + + int d, minDist = INT_MAX; + e0 = 0; + vt = v2->t; + + for( ei = v2->first; ei != 0; ei = edgePtr[ei].next ) + { + if( edgePtr[ei^(vt^1)].weight == 0 ) + continue; + u = vtxPtr+edgePtr[ei].dst; + if( u->t != vt || u->parent == 0 ) + continue; + // compute the distance to the tree root + for( d = 0;; ) + { + if( u->ts == curr_ts ) + { + d += u->dist; + break; + } + ej = u->parent; + d++; + if( ej < 0 ) + { + if( ej == ORPHAN ) + d = INT_MAX-1; + else + { + u->ts = curr_ts; + u->dist = 1; + } + break; + } + u = vtxPtr+edgePtr[ej].dst; + } + + // update the distance + if( ++d < INT_MAX ) + { + if( d < minDist ) + { + minDist = d; + e0 = ei; + } + for( u = vtxPtr+edgePtr[ei].dst; u->ts != curr_ts; u = vtxPtr+edgePtr[u->parent].dst ) + { + u->ts = curr_ts; + u->dist = --d; + } + } + } + + if( (v2->parent = e0) > 0 ) + { + v2->ts = curr_ts; + v2->dist = minDist; + continue; + } + + /* no parent is found */ + v2->ts = 0; + for( ei = v2->first; ei != 0; ei = edgePtr[ei].next ) + { + u = vtxPtr+edgePtr[ei].dst; + ej = u->parent; + if( u->t != vt || !ej ) + continue; + if( edgePtr[ei^(vt^1)].weight && !u->next ) + { + u->next = nilNode; + last = last->next = u; + } + if( ej > 0 && vtxPtr+edgePtr[ej].dst == v2 ) + { + orphans.push_back(u); + u->parent = ORPHAN; + } + } + } + } + return flow; +} + +template +bool GCGraph::inSourceSegment( int i ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + return vtcs[i].t == 0; +} + +}} // namespace detail, cv + + +//! @endcond + +#endif // OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/hal/hal.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/hal/hal.hpp new file mode 100644 index 0000000..ac20725 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/hal/hal.hpp @@ -0,0 +1,241 @@ +#ifndef CV_IMGPROC_HAL_HPP +#define CV_IMGPROC_HAL_HPP + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/hal/interface.h" + +namespace cv { namespace hal { + +//! @addtogroup imgproc_hal_functions +//! @{ + +//--------------------------- +//! @cond IGNORED + +struct CV_EXPORTS Filter2D +{ + CV_DEPRECATED static Ptr create(uchar * , size_t , int , + int , int , + int , int , + int , int , + int , double , + int , int , + bool , bool ); + virtual void apply(uchar * , size_t , + uchar * , size_t , + int , int , + int , int , + int , int ) = 0; + virtual ~Filter2D() {} +}; + +struct CV_EXPORTS SepFilter2D +{ + CV_DEPRECATED static Ptr create(int , int , int , + uchar * , int , + uchar * , int , + int , int , + double , int ); + virtual void apply(uchar * , size_t , + uchar * , size_t , + int , int , + int , int , + int , int ) = 0; + virtual ~SepFilter2D() {} +}; + + +struct CV_EXPORTS Morph +{ + CV_DEPRECATED static Ptr create(int , int , int , int , int , + int , uchar * , size_t , + int , int , + int , int , + int , const double *, + int , bool , bool ); + virtual void apply(uchar * , size_t , uchar * , size_t , int , int , + int , int , int , int , + int , int , int , int ) = 0; + virtual ~Morph() {} +}; + +//! @endcond +//--------------------------- + +CV_EXPORTS void filter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType, + bool isSubmatrix); + +CV_EXPORTS void sepFilter2D(int stype, int dtype, int ktype, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernelx_data, int kernelx_len, + uchar * kernely_data, int kernely_len, + int anchor_x, int anchor_y, + double delta, int borderType); + +CV_EXPORTS void morph(int op, int src_type, int dst_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int roi_width, int roi_height, int roi_x, int roi_y, + int roi_width2, int roi_height2, int roi_x2, int roi_y2, + int kernel_type, uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, int anchor_x, int anchor_y, + int borderType, const double borderValue[4], + int iterations, bool isSubmatrix); + + +CV_EXPORTS void resize(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + double inv_scale_x, double inv_scale_y, int interpolation); + +CV_EXPORTS void warpAffine(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[6], int interpolation, int borderType, const double borderValue[4]); + +CV_EXPORTS void warpPerspective(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[9], int interpolation, int borderType, const double borderValue[4]); + +CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, int dcn, bool swapBlue); + +CV_EXPORTS void cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int greenBits); + +CV_EXPORTS void cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int greenBits); + +CV_EXPORTS void cvtBGRtoGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue); + +CV_EXPORTS void cvtGraytoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn); + +CV_EXPORTS void cvtBGR5x5toGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); + +CV_EXPORTS void cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); +CV_EXPORTS void cvtBGRtoYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isCbCr); + +CV_EXPORTS void cvtYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isCbCr); + +CV_EXPORTS void cvtBGRtoXYZ(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue); + +CV_EXPORTS void cvtXYZtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue); + +CV_EXPORTS void cvtBGRtoHSV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV); + +CV_EXPORTS void cvtHSVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV); + +CV_EXPORTS void cvtBGRtoLab(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isLab, bool srgb); + +CV_EXPORTS void cvtLabtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isLab, bool srgb); + +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +//! Separate Y and UV planes +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); + +//! Separate Y and UV planes +CV_EXPORTS void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, + uchar * y_data, uchar * uv_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int uIdx, int ycn); + +CV_EXPORTS void cvtRGBAtoMultipliedRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); + +CV_EXPORTS void cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); + +CV_EXPORTS void integral(int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn); + +//! @} + +}} + +#endif // CV_IMGPROC_HAL_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/hal/interface.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/hal/interface.h new file mode 100644 index 0000000..f8dbcfe --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/hal/interface.h @@ -0,0 +1,46 @@ +#ifndef OPENCV_IMGPROC_HAL_INTERFACE_H +#define OPENCV_IMGPROC_HAL_INTERFACE_H + +//! @addtogroup imgproc_hal_interface +//! @{ + +//! @name Interpolation modes +//! @sa cv::InterpolationFlags +//! @{ +#define CV_HAL_INTER_NEAREST 0 +#define CV_HAL_INTER_LINEAR 1 +#define CV_HAL_INTER_CUBIC 2 +#define CV_HAL_INTER_AREA 3 +#define CV_HAL_INTER_LANCZOS4 4 +//! @} + +//! @name Morphology operations +//! @sa cv::MorphTypes +//! @{ +#define CV_HAL_MORPH_ERODE 0 +#define CV_HAL_MORPH_DILATE 1 +//! @} + +//! @name Threshold types +//! @sa cv::ThresholdTypes +//! @{ +#define CV_HAL_THRESH_BINARY 0 +#define CV_HAL_THRESH_BINARY_INV 1 +#define CV_HAL_THRESH_TRUNC 2 +#define CV_HAL_THRESH_TOZERO 3 +#define CV_HAL_THRESH_TOZERO_INV 4 +#define CV_HAL_THRESH_MASK 7 +#define CV_HAL_THRESH_OTSU 8 +#define CV_HAL_THRESH_TRIANGLE 16 +//! @} + +//! @name Adaptive threshold algorithm +//! @sa cv::AdaptiveThresholdTypes +//! @{ +#define CV_HAL_ADAPTIVE_THRESH_MEAN_C 0 +#define CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C 1 +//! @} + +//! @} + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/imgproc.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/imgproc.hpp new file mode 100644 index 0000000..4175bd0 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/imgproc.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/imgproc.hpp" diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/imgproc_c.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/imgproc_c.h new file mode 100644 index 0000000..86dc119 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/imgproc_c.h @@ -0,0 +1,1177 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_IMGPROC_C_H +#define OPENCV_IMGPROC_IMGPROC_C_H + +#include "opencv2/imgproc/types_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @addtogroup imgproc_c +@{ +*/ + +/*********************** Background statistics accumulation *****************************/ + +/** @brief Adds image to accumulator +@see cv::accumulate +*/ +CVAPI(void) cvAcc( const CvArr* image, CvArr* sum, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Adds squared image to accumulator +@see cv::accumulateSquare +*/ +CVAPI(void) cvSquareAcc( const CvArr* image, CvArr* sqsum, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Adds a product of two images to accumulator +@see cv::accumulateProduct +*/ +CVAPI(void) cvMultiplyAcc( const CvArr* image1, const CvArr* image2, CvArr* acc, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Adds image to accumulator with weights: acc = acc*(1-alpha) + image*alpha +@see cv::accumulateWeighted +*/ +CVAPI(void) cvRunningAvg( const CvArr* image, CvArr* acc, double alpha, + const CvArr* mask CV_DEFAULT(NULL) ); + +/****************************************************************************************\ +* Image Processing * +\****************************************************************************************/ + +/** Copies source 2D array inside of the larger destination array and + makes a border of the specified type (IPL_BORDER_*) around the copied area. */ +CVAPI(void) cvCopyMakeBorder( const CvArr* src, CvArr* dst, CvPoint offset, + int bordertype, CvScalar value CV_DEFAULT(cvScalarAll(0))); + +/** @brief Smooths the image in one of several ways. + +@param src The source image +@param dst The destination image +@param smoothtype Type of the smoothing, see SmoothMethod_c +@param size1 The first parameter of the smoothing operation, the aperture width. Must be a +positive odd number (1, 3, 5, ...) +@param size2 The second parameter of the smoothing operation, the aperture height. Ignored by +CV_MEDIAN and CV_BILATERAL methods. In the case of simple scaled/non-scaled and Gaussian blur if +size2 is zero, it is set to size1. Otherwise it must be a positive odd number. +@param sigma1 In the case of a Gaussian parameter this parameter may specify Gaussian \f$\sigma\f$ +(standard deviation). If it is zero, it is calculated from the kernel size: +\f[\sigma = 0.3 (n/2 - 1) + 0.8 \quad \text{where} \quad n= \begin{array}{l l} \mbox{\texttt{size1} for horizontal kernel} \\ \mbox{\texttt{size2} for vertical kernel} \end{array}\f] +Using standard sigma for small kernels ( \f$3\times 3\f$ to \f$7\times 7\f$ ) gives better speed. If +sigma1 is not zero, while size1 and size2 are zeros, the kernel size is calculated from the +sigma (to provide accurate enough operation). +@param sigma2 additional parameter for bilateral filtering + +@see cv::GaussianBlur, cv::blur, cv::medianBlur, cv::bilateralFilter. + */ +CVAPI(void) cvSmooth( const CvArr* src, CvArr* dst, + int smoothtype CV_DEFAULT(CV_GAUSSIAN), + int size1 CV_DEFAULT(3), + int size2 CV_DEFAULT(0), + double sigma1 CV_DEFAULT(0), + double sigma2 CV_DEFAULT(0)); + +/** @brief Convolves an image with the kernel. + +@param src input image. +@param dst output image of the same size and the same number of channels as src. +@param kernel convolution kernel (or rather a correlation kernel), a single-channel floating point +matrix; if you want to apply different kernels to different channels, split the image into +separate color planes using split and process them individually. +@param anchor anchor of the kernel that indicates the relative position of a filtered point within +the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor +is at the kernel center. + +@see cv::filter2D + */ +CVAPI(void) cvFilter2D( const CvArr* src, CvArr* dst, const CvMat* kernel, + CvPoint anchor CV_DEFAULT(cvPoint(-1,-1))); + +/** @brief Finds integral image: SUM(X,Y) = sum(x \texttt{hist1}(I)\)}{\frac{\texttt{hist2}(I) \cdot \texttt{scale}}{\texttt{hist1}(I)}}{if \(\texttt{hist1}(I) \ne 0\) and \(\texttt{hist2}(I) \le \texttt{hist1}(I)\)}\f] + +@param hist1 First histogram (the divisor). +@param hist2 Second histogram. +@param dst_hist Destination histogram. +@param scale Scale factor for the destination histogram. + */ +CVAPI(void) cvCalcProbDensity( const CvHistogram* hist1, const CvHistogram* hist2, + CvHistogram* dst_hist, double scale CV_DEFAULT(255) ); + +/** @brief equalizes histogram of 8-bit single-channel image +@see cv::equalizeHist +*/ +CVAPI(void) cvEqualizeHist( const CvArr* src, CvArr* dst ); + + +/** @brief Applies distance transform to binary image +@see cv::distanceTransform +*/ +CVAPI(void) cvDistTransform( const CvArr* src, CvArr* dst, + int distance_type CV_DEFAULT(CV_DIST_L2), + int mask_size CV_DEFAULT(3), + const float* mask CV_DEFAULT(NULL), + CvArr* labels CV_DEFAULT(NULL), + int labelType CV_DEFAULT(CV_DIST_LABEL_CCOMP)); + + +/** @brief Applies fixed-level threshold to grayscale image. + + This is a basic operation applied before retrieving contours +@see cv::threshold +*/ +CVAPI(double) cvThreshold( const CvArr* src, CvArr* dst, + double threshold, double max_value, + int threshold_type ); + +/** @brief Applies adaptive threshold to grayscale image. + + The two parameters for methods CV_ADAPTIVE_THRESH_MEAN_C and + CV_ADAPTIVE_THRESH_GAUSSIAN_C are: + neighborhood size (3, 5, 7 etc.), + and a constant subtracted from mean (...,-3,-2,-1,0,1,2,3,...) +@see cv::adaptiveThreshold +*/ +CVAPI(void) cvAdaptiveThreshold( const CvArr* src, CvArr* dst, double max_value, + int adaptive_method CV_DEFAULT(CV_ADAPTIVE_THRESH_MEAN_C), + int threshold_type CV_DEFAULT(CV_THRESH_BINARY), + int block_size CV_DEFAULT(3), + double param1 CV_DEFAULT(5)); + +/** @brief Fills the connected component until the color difference gets large enough +@see cv::floodFill +*/ +CVAPI(void) cvFloodFill( CvArr* image, CvPoint seed_point, + CvScalar new_val, CvScalar lo_diff CV_DEFAULT(cvScalarAll(0)), + CvScalar up_diff CV_DEFAULT(cvScalarAll(0)), + CvConnectedComp* comp CV_DEFAULT(NULL), + int flags CV_DEFAULT(4), + CvArr* mask CV_DEFAULT(NULL)); + +/****************************************************************************************\ +* Feature detection * +\****************************************************************************************/ + +/** @brief Runs canny edge detector +@see cv::Canny +*/ +CVAPI(void) cvCanny( const CvArr* image, CvArr* edges, double threshold1, + double threshold2, int aperture_size CV_DEFAULT(3) ); + +/** @brief Calculates constraint image for corner detection + + Dx^2 * Dyy + Dxx * Dy^2 - 2 * Dx * Dy * Dxy. + Applying threshold to the result gives coordinates of corners +@see cv::preCornerDetect +*/ +CVAPI(void) cvPreCornerDetect( const CvArr* image, CvArr* corners, + int aperture_size CV_DEFAULT(3) ); + +/** @brief Calculates eigen values and vectors of 2x2 + gradient covariation matrix at every image pixel +@see cv::cornerEigenValsAndVecs +*/ +CVAPI(void) cvCornerEigenValsAndVecs( const CvArr* image, CvArr* eigenvv, + int block_size, int aperture_size CV_DEFAULT(3) ); + +/** @brief Calculates minimal eigenvalue for 2x2 gradient covariation matrix at + every image pixel +@see cv::cornerMinEigenVal +*/ +CVAPI(void) cvCornerMinEigenVal( const CvArr* image, CvArr* eigenval, + int block_size, int aperture_size CV_DEFAULT(3) ); + +/** @brief Harris corner detector: + + Calculates det(M) - k*(trace(M)^2), where M is 2x2 gradient covariation matrix for each pixel +@see cv::cornerHarris +*/ +CVAPI(void) cvCornerHarris( const CvArr* image, CvArr* harris_response, + int block_size, int aperture_size CV_DEFAULT(3), + double k CV_DEFAULT(0.04) ); + +/** @brief Adjust corner position using some sort of gradient search +@see cv::cornerSubPix +*/ +CVAPI(void) cvFindCornerSubPix( const CvArr* image, CvPoint2D32f* corners, + int count, CvSize win, CvSize zero_zone, + CvTermCriteria criteria ); + +/** @brief Finds a sparse set of points within the selected region + that seem to be easy to track +@see cv::goodFeaturesToTrack +*/ +CVAPI(void) cvGoodFeaturesToTrack( const CvArr* image, CvArr* eig_image, + CvArr* temp_image, CvPoint2D32f* corners, + int* corner_count, double quality_level, + double min_distance, + const CvArr* mask CV_DEFAULT(NULL), + int block_size CV_DEFAULT(3), + int use_harris CV_DEFAULT(0), + double k CV_DEFAULT(0.04) ); + +/** @brief Finds lines on binary image using one of several methods. + + line_storage is either memory storage or 1 x _max number of lines_ CvMat, its + number of columns is changed by the function. + method is one of CV_HOUGH_*; + rho, theta and threshold are used for each of those methods; + param1 ~ line length, param2 ~ line gap - for probabilistic, + param1 ~ srn, param2 ~ stn - for multi-scale +@see cv::HoughLines +*/ +CVAPI(CvSeq*) cvHoughLines2( CvArr* image, void* line_storage, int method, + double rho, double theta, int threshold, + double param1 CV_DEFAULT(0), double param2 CV_DEFAULT(0), + double min_theta CV_DEFAULT(0), double max_theta CV_DEFAULT(CV_PI)); + +/** @brief Finds circles in the image +@see cv::HoughCircles +*/ +CVAPI(CvSeq*) cvHoughCircles( CvArr* image, void* circle_storage, + int method, double dp, double min_dist, + double param1 CV_DEFAULT(100), + double param2 CV_DEFAULT(100), + int min_radius CV_DEFAULT(0), + int max_radius CV_DEFAULT(0)); + +/** @brief Fits a line into set of 2d or 3d points in a robust way (M-estimator technique) +@see cv::fitLine +*/ +CVAPI(void) cvFitLine( const CvArr* points, int dist_type, double param, + double reps, double aeps, float* line ); + +/****************************************************************************************\ +* Drawing * +\****************************************************************************************/ + +/****************************************************************************************\ +* Drawing functions work with images/matrices of arbitrary type. * +* For color images the channel order is BGR[A] * +* Antialiasing is supported only for 8-bit image now. * +* All the functions include parameter color that means rgb value (that may be * +* constructed with CV_RGB macro) for color images and brightness * +* for grayscale images. * +* If a drawn figure is partially or completely outside of the image, it is clipped.* +\****************************************************************************************/ + +#define CV_FILLED -1 + +#define CV_AA 16 + +/** @brief Draws 4-connected, 8-connected or antialiased line segment connecting two points +@see cv::line +*/ +CVAPI(void) cvLine( CvArr* img, CvPoint pt1, CvPoint pt2, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ); + +/** @brief Draws a rectangle given two opposite corners of the rectangle (pt1 & pt2) + + if thickness<0 (e.g. thickness == CV_FILLED), the filled box is drawn +@see cv::rectangle +*/ +CVAPI(void) cvRectangle( CvArr* img, CvPoint pt1, CvPoint pt2, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), + int shift CV_DEFAULT(0)); + +/** @brief Draws a rectangle specified by a CvRect structure +@see cv::rectangle +*/ +CVAPI(void) cvRectangleR( CvArr* img, CvRect r, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), + int shift CV_DEFAULT(0)); + + +/** @brief Draws a circle with specified center and radius. + + Thickness works in the same way as with cvRectangle +@see cv::circle +*/ +CVAPI(void) cvCircle( CvArr* img, CvPoint center, int radius, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0)); + +/** @brief Draws ellipse outline, filled ellipse, elliptic arc or filled elliptic sector + + depending on _thickness_, _start_angle_ and _end_angle_ parameters. The resultant figure + is rotated by _angle_. All the angles are in degrees +@see cv::ellipse +*/ +CVAPI(void) cvEllipse( CvArr* img, CvPoint center, CvSize axes, + double angle, double start_angle, double end_angle, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0)); + +CV_INLINE void cvEllipseBox( CvArr* img, CvBox2D box, CvScalar color, + int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ) +{ + CvSize axes = cvSize( + cvRound(box.size.width*0.5), + cvRound(box.size.height*0.5) + ); + + cvEllipse( img, cvPointFrom32f( box.center ), axes, box.angle, + 0, 360, color, thickness, line_type, shift ); +} + +/** @brief Fills convex or monotonous polygon. +@see cv::fillConvexPoly +*/ +CVAPI(void) cvFillConvexPoly( CvArr* img, const CvPoint* pts, int npts, CvScalar color, + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0)); + +/** @brief Fills an area bounded by one or more arbitrary polygons +@see cv::fillPoly +*/ +CVAPI(void) cvFillPoly( CvArr* img, CvPoint** pts, const int* npts, + int contours, CvScalar color, + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ); + +/** @brief Draws one or more polygonal curves +@see cv::polylines +*/ +CVAPI(void) cvPolyLine( CvArr* img, CvPoint** pts, const int* npts, int contours, + int is_closed, CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ); + +#define cvDrawRect cvRectangle +#define cvDrawLine cvLine +#define cvDrawCircle cvCircle +#define cvDrawEllipse cvEllipse +#define cvDrawPolyLine cvPolyLine + +/** @brief Clips the line segment connecting *pt1 and *pt2 + by the rectangular window + + (0<=xptr will point to pt1 (or pt2, see left_to_right description) location in +the image. Returns the number of pixels on the line between the ending points. +@see cv::LineIterator +*/ +CVAPI(int) cvInitLineIterator( const CvArr* image, CvPoint pt1, CvPoint pt2, + CvLineIterator* line_iterator, + int connectivity CV_DEFAULT(8), + int left_to_right CV_DEFAULT(0)); + +#define CV_NEXT_LINE_POINT( line_iterator ) \ +{ \ + int _line_iterator_mask = (line_iterator).err < 0 ? -1 : 0; \ + (line_iterator).err += (line_iterator).minus_delta + \ + ((line_iterator).plus_delta & _line_iterator_mask); \ + (line_iterator).ptr += (line_iterator).minus_step + \ + ((line_iterator).plus_step & _line_iterator_mask); \ +} + + +#define CV_FONT_HERSHEY_SIMPLEX 0 +#define CV_FONT_HERSHEY_PLAIN 1 +#define CV_FONT_HERSHEY_DUPLEX 2 +#define CV_FONT_HERSHEY_COMPLEX 3 +#define CV_FONT_HERSHEY_TRIPLEX 4 +#define CV_FONT_HERSHEY_COMPLEX_SMALL 5 +#define CV_FONT_HERSHEY_SCRIPT_SIMPLEX 6 +#define CV_FONT_HERSHEY_SCRIPT_COMPLEX 7 + +#define CV_FONT_ITALIC 16 + +#define CV_FONT_VECTOR0 CV_FONT_HERSHEY_SIMPLEX + + +/** Font structure */ +typedef struct CvFont +{ + const char* nameFont; //Qt:nameFont + CvScalar color; //Qt:ColorFont -> cvScalar(blue_component, green_component, red_component[, alpha_component]) + int font_face; //Qt: bool italic /** =CV_FONT_* */ + const int* ascii; //!< font data and metrics + const int* greek; + const int* cyrillic; + float hscale, vscale; + float shear; //!< slope coefficient: 0 - normal, >0 - italic + int thickness; //!< Qt: weight /** letters thickness */ + float dx; //!< horizontal interval between letters + int line_type; //!< Qt: PointSize +} +CvFont; + +/** @brief Initializes font structure (OpenCV 1.x API). + +The function initializes the font structure that can be passed to text rendering functions. + +@param font Pointer to the font structure initialized by the function +@param font_face Font name identifier. See cv::HersheyFonts and corresponding old CV_* identifiers. +@param hscale Horizontal scale. If equal to 1.0f , the characters have the original width +depending on the font type. If equal to 0.5f , the characters are of half the original width. +@param vscale Vertical scale. If equal to 1.0f , the characters have the original height depending +on the font type. If equal to 0.5f , the characters are of half the original height. +@param shear Approximate tangent of the character slope relative to the vertical line. A zero +value means a non-italic font, 1.0f means about a 45 degree slope, etc. +@param thickness Thickness of the text strokes +@param line_type Type of the strokes, see line description + +@sa cvPutText + */ +CVAPI(void) cvInitFont( CvFont* font, int font_face, + double hscale, double vscale, + double shear CV_DEFAULT(0), + int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8)); + +CV_INLINE CvFont cvFont( double scale, int thickness CV_DEFAULT(1) ) +{ + CvFont font; + cvInitFont( &font, CV_FONT_HERSHEY_PLAIN, scale, scale, 0, thickness, CV_AA ); + return font; +} + +/** @brief Renders text stroke with specified font and color at specified location. + CvFont should be initialized with cvInitFont +@see cvInitFont, cvGetTextSize, cvFont, cv::putText +*/ +CVAPI(void) cvPutText( CvArr* img, const char* text, CvPoint org, + const CvFont* font, CvScalar color ); + +/** @brief Calculates bounding box of text stroke (useful for alignment) +@see cv::getTextSize +*/ +CVAPI(void) cvGetTextSize( const char* text_string, const CvFont* font, + CvSize* text_size, int* baseline ); + +/** @brief Unpacks color value + +if arrtype is CV_8UC?, _color_ is treated as packed color value, otherwise the first channels +(depending on arrtype) of destination scalar are set to the same value = _color_ +*/ +CVAPI(CvScalar) cvColorToScalar( double packed_color, int arrtype ); + +/** @brief Returns the polygon points which make up the given ellipse. + +The ellipse is define by the box of size 'axes' rotated 'angle' around the 'center'. A partial +sweep of the ellipse arc can be done by specifying arc_start and arc_end to be something other than +0 and 360, respectively. The input array 'pts' must be large enough to hold the result. The total +number of points stored into 'pts' is returned by this function. +@see cv::ellipse2Poly +*/ +CVAPI(int) cvEllipse2Poly( CvPoint center, CvSize axes, + int angle, int arc_start, int arc_end, CvPoint * pts, int delta ); + +/** @brief Draws contour outlines or filled interiors on the image +@see cv::drawContours +*/ +CVAPI(void) cvDrawContours( CvArr *img, CvSeq* contour, + CvScalar external_color, CvScalar hole_color, + int max_level, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), + CvPoint offset CV_DEFAULT(cvPoint(0,0))); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/types_c.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/types_c.h new file mode 100644 index 0000000..d3e55f5 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/imgproc/types_c.h @@ -0,0 +1,659 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_TYPES_C_H +#define OPENCV_IMGPROC_TYPES_C_H + +#include "opencv2/core/core_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @addtogroup imgproc_c + @{ +*/ + +/** Connected component structure */ +typedef struct CvConnectedComp +{ + double area; /** DBL_EPSILON ? 1./std::sqrt(am00) : 0; + } + operator cv::Moments() const + { + return cv::Moments(m00, m10, m01, m20, m11, m02, m30, m21, m12, m03); + } +#endif +} +CvMoments; + +#ifdef __cplusplus +} // extern "C" + +CV_INLINE CvMoments cvMoments() +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMoments self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMoments(); +#endif +} + +CV_INLINE CvMoments cvMoments(const cv::Moments& m) +{ +#if !defined(CV__ENABLE_C_API_CTORS) + double am00 = std::abs(m.m00); + CvMoments self = { + m.m00, m.m10, m.m01, m.m20, m.m11, m.m02, m.m30, m.m21, m.m12, m.m03, + m.mu20, m.mu11, m.mu02, m.mu30, m.mu21, m.mu12, m.mu03, + am00 > DBL_EPSILON ? 1./std::sqrt(am00) : 0 + }; + return self; +#else + return CvMoments(m); +#endif +} + +extern "C" { +#endif // __cplusplus + +/** Hu invariants */ +typedef struct CvHuMoments +{ + double hu1, hu2, hu3, hu4, hu5, hu6, hu7; /**< Hu invariants */ +} +CvHuMoments; + +/** Template matching methods */ +enum +{ + CV_TM_SQDIFF =0, + CV_TM_SQDIFF_NORMED =1, + CV_TM_CCORR =2, + CV_TM_CCORR_NORMED =3, + CV_TM_CCOEFF =4, + CV_TM_CCOEFF_NORMED =5 +}; + +typedef float (CV_CDECL * CvDistanceFunction)( const float* a, const float* b, void* user_param ); + +/** Contour retrieval modes */ +enum +{ + CV_RETR_EXTERNAL=0, + CV_RETR_LIST=1, + CV_RETR_CCOMP=2, + CV_RETR_TREE=3, + CV_RETR_FLOODFILL=4 +}; + +/** Contour approximation methods */ +enum +{ + CV_CHAIN_CODE=0, + CV_CHAIN_APPROX_NONE=1, + CV_CHAIN_APPROX_SIMPLE=2, + CV_CHAIN_APPROX_TC89_L1=3, + CV_CHAIN_APPROX_TC89_KCOS=4, + CV_LINK_RUNS=5 +}; + +/* +Internal structure that is used for sequential retrieving contours from the image. +It supports both hierarchical and plane variants of Suzuki algorithm. +*/ +typedef struct _CvContourScanner* CvContourScanner; + +/** Freeman chain reader state */ +typedef struct CvChainPtReader +{ + CV_SEQ_READER_FIELDS() + char code; + CvPoint pt; + schar deltas[8][2]; +} +CvChainPtReader; + +/** initializes 8-element array for fast access to 3x3 neighborhood of a pixel */ +#define CV_INIT_3X3_DELTAS( deltas, step, nch ) \ + ((deltas)[0] = (nch), (deltas)[1] = -(step) + (nch), \ + (deltas)[2] = -(step), (deltas)[3] = -(step) - (nch), \ + (deltas)[4] = -(nch), (deltas)[5] = (step) - (nch), \ + (deltas)[6] = (step), (deltas)[7] = (step) + (nch)) + + +/** Contour approximation algorithms */ +enum +{ + CV_POLY_APPROX_DP = 0 +}; + +/** Shape matching methods */ +enum +{ + CV_CONTOURS_MATCH_I1 =1, //!< \f[I_1(A,B) = \sum _{i=1...7} \left | \frac{1}{m^A_i} - \frac{1}{m^B_i} \right |\f] + CV_CONTOURS_MATCH_I2 =2, //!< \f[I_2(A,B) = \sum _{i=1...7} \left | m^A_i - m^B_i \right |\f] + CV_CONTOURS_MATCH_I3 =3 //!< \f[I_3(A,B) = \max _{i=1...7} \frac{ \left| m^A_i - m^B_i \right| }{ \left| m^A_i \right| }\f] +}; + +/** Shape orientation */ +enum +{ + CV_CLOCKWISE =1, + CV_COUNTER_CLOCKWISE =2 +}; + + +/** Convexity defect */ +typedef struct CvConvexityDefect +{ + CvPoint* start; /**< point of the contour where the defect begins */ + CvPoint* end; /**< point of the contour where the defect ends */ + CvPoint* depth_point; /**< the farthest from the convex hull point within the defect */ + float depth; /**< distance between the farthest point and the convex hull */ +} CvConvexityDefect; + + +/** Histogram comparison methods */ +enum +{ + CV_COMP_CORREL =0, + CV_COMP_CHISQR =1, + CV_COMP_INTERSECT =2, + CV_COMP_BHATTACHARYYA =3, + CV_COMP_HELLINGER =CV_COMP_BHATTACHARYYA, + CV_COMP_CHISQR_ALT =4, + CV_COMP_KL_DIV =5 +}; + +/** Mask size for distance transform */ +enum +{ + CV_DIST_MASK_3 =3, + CV_DIST_MASK_5 =5, + CV_DIST_MASK_PRECISE =0 +}; + +/** Content of output label array: connected components or pixels */ +enum +{ + CV_DIST_LABEL_CCOMP = 0, + CV_DIST_LABEL_PIXEL = 1 +}; + +/** Distance types for Distance Transform and M-estimators */ +enum +{ + CV_DIST_USER =-1, /**< User defined distance */ + CV_DIST_L1 =1, /**< distance = |x1-x2| + |y1-y2| */ + CV_DIST_L2 =2, /**< the simple euclidean distance */ + CV_DIST_C =3, /**< distance = max(|x1-x2|,|y1-y2|) */ + CV_DIST_L12 =4, /**< L1-L2 metric: distance = 2(sqrt(1+x*x/2) - 1)) */ + CV_DIST_FAIR =5, /**< distance = c^2(|x|/c-log(1+|x|/c)), c = 1.3998 */ + CV_DIST_WELSCH =6, /**< distance = c^2/2(1-exp(-(x/c)^2)), c = 2.9846 */ + CV_DIST_HUBER =7 /**< distance = |x| threshold ? max_value : 0 */ + CV_THRESH_BINARY_INV =1, /**< value = value > threshold ? 0 : max_value */ + CV_THRESH_TRUNC =2, /**< value = value > threshold ? threshold : value */ + CV_THRESH_TOZERO =3, /**< value = value > threshold ? value : 0 */ + CV_THRESH_TOZERO_INV =4, /**< value = value > threshold ? 0 : value */ + CV_THRESH_MASK =7, + CV_THRESH_OTSU =8, /**< use Otsu algorithm to choose the optimal threshold value; + combine the flag with one of the above CV_THRESH_* values */ + CV_THRESH_TRIANGLE =16 /**< use Triangle algorithm to choose the optimal threshold value; + combine the flag with one of the above CV_THRESH_* values, but not + with CV_THRESH_OTSU */ +}; + +/** Adaptive threshold methods */ +enum +{ + CV_ADAPTIVE_THRESH_MEAN_C =0, + CV_ADAPTIVE_THRESH_GAUSSIAN_C =1 +}; + +/** FloodFill flags */ +enum +{ + CV_FLOODFILL_FIXED_RANGE =(1 << 16), + CV_FLOODFILL_MASK_ONLY =(1 << 17) +}; + + +/** Canny edge detector flags */ +enum +{ + CV_CANNY_L2_GRADIENT =(1 << 31) +}; + +/** Variants of a Hough transform */ +enum +{ + CV_HOUGH_STANDARD =0, + CV_HOUGH_PROBABILISTIC =1, + CV_HOUGH_MULTI_SCALE =2, + CV_HOUGH_GRADIENT =3 +}; + + +/* Fast search data structures */ +struct CvFeatureTree; +struct CvLSH; +struct CvLSHOperations; + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/opencv.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/opencv.hpp new file mode 100644 index 0000000..d17b94a --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/opencv.hpp @@ -0,0 +1,95 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_ALL_HPP +#define OPENCV_ALL_HPP + +// File that defines what modules where included during the build of OpenCV +// These are purely the defines of the correct HAVE_OPENCV_modulename values +#include "opencv2/opencv_modules.hpp" + +// Then the list of defines is checked to include the correct headers +// Core library is always included --> without no OpenCV functionality available +#include "opencv2/core.hpp" + +// Then the optional modules are checked +#ifdef HAVE_OPENCV_CALIB3D +#include "opencv2/calib3d.hpp" +#endif +#ifdef HAVE_OPENCV_FEATURES2D +#include "opencv2/features2d.hpp" +#endif +#ifdef HAVE_OPENCV_DNN +#include "opencv2/dnn.hpp" +#endif +#ifdef HAVE_OPENCV_FLANN +#include "opencv2/flann.hpp" +#endif +#ifdef HAVE_OPENCV_HIGHGUI +#include "opencv2/highgui.hpp" +#endif +#ifdef HAVE_OPENCV_IMGCODECS +#include "opencv2/imgcodecs.hpp" +#endif +#ifdef HAVE_OPENCV_IMGPROC +#include "opencv2/imgproc.hpp" +#endif +#ifdef HAVE_OPENCV_ML +#include "opencv2/ml.hpp" +#endif +#ifdef HAVE_OPENCV_OBJDETECT +#include "opencv2/objdetect.hpp" +#endif +#ifdef HAVE_OPENCV_PHOTO +#include "opencv2/photo.hpp" +#endif +#ifdef HAVE_OPENCV_STITCHING +#include "opencv2/stitching.hpp" +#endif +#ifdef HAVE_OPENCV_VIDEO +#include "opencv2/video.hpp" +#endif +#ifdef HAVE_OPENCV_VIDEOIO +#include "opencv2/videoio.hpp" +#endif + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/opencv_modules.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/opencv_modules.hpp new file mode 100644 index 0000000..7eae9da --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/opencv_modules.hpp @@ -0,0 +1,21 @@ +/* + * ** File generated automatically, do not modify ** + * + * This file defines the list of modules available in current build configuration + * + * +*/ + +// This definition means that OpenCV is built with enabled non-free code. +// For example, patented algorithms for non-profit/non-commercial use only. +/* #undef OPENCV_ENABLE_NONFREE */ + +#define HAVE_OPENCV_CORE +#define HAVE_OPENCV_FEATURES2D +#define HAVE_OPENCV_HIGHGUI +#define HAVE_OPENCV_IMGPROC +#define HAVE_OPENCV_PHOTO +#define HAVE_OPENCV_VIDEO +#define HAVE_OPENCV_WORLD + + diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo.hpp new file mode 100644 index 0000000..c2e89a3 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo.hpp @@ -0,0 +1,858 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_PHOTO_HPP +#define OPENCV_PHOTO_HPP + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +/** +@defgroup photo Computational Photography + +This module includes photo processing algorithms +@{ + @defgroup photo_inpaint Inpainting + @defgroup photo_denoise Denoising + @defgroup photo_hdr HDR imaging + +This section describes high dynamic range imaging algorithms namely tonemapping, exposure alignment, +camera calibration with multiple exposures and exposure fusion. + + @defgroup photo_decolor Contrast Preserving Decolorization + +Useful links: + +http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html + + @defgroup photo_clone Seamless Cloning + +Useful links: + +https://www.learnopencv.com/seamless-cloning-using-opencv-python-cpp + + @defgroup photo_render Non-Photorealistic Rendering + +Useful links: + +http://www.inf.ufrgs.br/~eslgastal/DomainTransform + +https://www.learnopencv.com/non-photorealistic-rendering-using-opencv-python-c/ + + @defgroup photo_c C API +@} + */ + +namespace cv +{ + +//! @addtogroup photo +//! @{ + +//! @addtogroup photo_inpaint +//! @{ +//! the inpainting algorithm +enum +{ + INPAINT_NS = 0, //!< Use Navier-Stokes based method + INPAINT_TELEA = 1 //!< Use the algorithm proposed by Alexandru Telea @cite Telea04 +}; + +/** @brief Restores the selected region in an image using the region neighborhood. + +@param src Input 8-bit, 16-bit unsigned or 32-bit float 1-channel or 8-bit 3-channel image. +@param inpaintMask Inpainting mask, 8-bit 1-channel image. Non-zero pixels indicate the area that +needs to be inpainted. +@param dst Output image with the same size and type as src . +@param inpaintRadius Radius of a circular neighborhood of each point inpainted that is considered +by the algorithm. +@param flags Inpainting method that could be cv::INPAINT_NS or cv::INPAINT_TELEA + +The function reconstructs the selected image area from the pixel near the area boundary. The +function may be used to remove dust and scratches from a scanned photo, or to remove undesirable +objects from still images or video. See for more details. + +@note + - An example using the inpainting technique can be found at + opencv_source_code/samples/cpp/inpaint.cpp + - (Python) An example using the inpainting technique can be found at + opencv_source_code/samples/python/inpaint.py + */ +CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask, + OutputArray dst, double inpaintRadius, int flags ); + +//! @} photo_inpaint + +//! @addtogroup photo_denoise +//! @{ + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Big h value perfectly removes noise but also +removes image details, smaller h value preserves details but also preserves some noise + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3, + int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in dst. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); + +/** @brief Modification of fastNlMeansDenoising function for colored images + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise +@param hColor The same as h but for color components. For most images value equals 10 +will be enough to remove colored noise and do not distort colors + +The function converts image to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using fastNlMeansDenoising function. + */ +CV_EXPORTS_W void fastNlMeansDenoisingColored( InputArray src, OutputArray dst, + float h = 3, float hColor = 3, + int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Modification of fastNlMeansDenoising function for images sequence where consecutive images have been +captured in small period of time. For example video. This version of the function is for grayscale +images or for manual manipulation with colorspaces. For more details see + + +@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or +4-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Bigger h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise + */ +CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Modification of fastNlMeansDenoising function for images sequence where consecutive images have been +captured in small period of time. For example video. This version of the function is for grayscale +images or for manual manipulation with colorspaces. For more details see + + +@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel images sequence. All images should +have the same type and size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in dst. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 + */ +CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); + +/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences + +@param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise. +@param hColor The same as h but for color components. + +The function converts images to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using fastNlMeansDenoisingMulti function. + */ +CV_EXPORTS_W void fastNlMeansDenoisingColoredMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h = 3, float hColor = 3, + int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Primal-dual algorithm is an algorithm for solving special types of variational problems (that is, +finding a function to minimize some functional). As the image denoising, in particular, may be seen +as the variational problem, primal-dual algorithm then can be used to perform denoising and this is +exactly what is implemented. + +It should be noted, that this implementation was taken from the July 2013 blog entry +@cite MA13 , which also contained (slightly more general) ready-to-use source code on Python. +Subsequently, that code was rewritten on C++ with the usage of openCV by Vadim Pisarevsky at the end +of July 2013 and finally it was slightly adapted by later authors. + +Although the thorough discussion and justification of the algorithm involved may be found in +@cite ChambolleEtAl, it might make sense to skim over it here, following @cite MA13 . To begin +with, we consider the 1-byte gray-level images as the functions from the rectangular domain of +pixels (it may be seen as set +\f$\left\{(x,y)\in\mathbb{N}\times\mathbb{N}\mid 1\leq x\leq n,\;1\leq y\leq m\right\}\f$ for some +\f$m,\;n\in\mathbb{N}\f$) into \f$\{0,1,\dots,255\}\f$. We shall denote the noised images as \f$f_i\f$ and with +this view, given some image \f$x\f$ of the same size, we may measure how bad it is by the formula + +\f[\left\|\left\|\nabla x\right\|\right\| + \lambda\sum_i\left\|\left\|x-f_i\right\|\right\|\f] + +\f$\|\|\cdot\|\|\f$ here denotes \f$L_2\f$-norm and as you see, the first addend states that we want our +image to be smooth (ideally, having zero gradient, thus being constant) and the second states that +we want our result to be close to the observations we've got. If we treat \f$x\f$ as a function, this is +exactly the functional what we seek to minimize and here the Primal-Dual algorithm comes into play. + +@param observations This array should contain one or more noised versions of the image that is to +be restored. +@param result Here the denoised image will be stored. There is no need to do pre-allocation of +storage space, as it will be automatically allocated, if necessary. +@param lambda Corresponds to \f$\lambda\f$ in the formulas above. As it is enlarged, the smooth +(blurred) images are treated more favorably than detailed (but maybe more noised) ones. Roughly +speaking, as it becomes smaller, the result will be more blur but more sever outliers will be +removed. +@param niters Number of iterations that the algorithm will run. Of course, as more iterations as +better, but it is hard to quantitatively refine this statement, so just use the default and +increase it if the results are poor. + */ +CV_EXPORTS_W void denoise_TVL1(const std::vector& observations,Mat& result, double lambda=1.0, int niters=30); + +//! @} photo_denoise + +//! @addtogroup photo_hdr +//! @{ + +enum { LDR_SIZE = 256 }; + +/** @brief Base class for tonemapping algorithms - tools that are used to map HDR image to 8-bit range. + */ +class CV_EXPORTS_W Tonemap : public Algorithm +{ +public: + /** @brief Tonemaps image + + @param src source image - CV_32FC3 Mat (float 32 bits 3 channels) + @param dst destination image - CV_32FC3 Mat with values in [0, 1] range + */ + CV_WRAP virtual void process(InputArray src, OutputArray dst) = 0; + + CV_WRAP virtual float getGamma() const = 0; + CV_WRAP virtual void setGamma(float gamma) = 0; +}; + +/** @brief Creates simple linear mapper with gamma correction + +@param gamma positive value for gamma correction. Gamma value of 1.0 implies no correction, gamma +equal to 2.2f is suitable for most displays. +Generally gamma \> 1 brightens the image and gamma \< 1 darkens it. + */ +CV_EXPORTS_W Ptr createTonemap(float gamma = 1.0f); + +/** @brief Adaptive logarithmic mapping is a fast global tonemapping algorithm that scales the image in +logarithmic domain. + +Since it's a global operator the same function is applied to all the pixels, it is controlled by the +bias parameter. + +Optional saturation enhancement is possible as described in @cite FL02 . + +For more information see @cite DM03 . + */ +class CV_EXPORTS_W TonemapDrago : public Tonemap +{ +public: + + CV_WRAP virtual float getSaturation() const = 0; + CV_WRAP virtual void setSaturation(float saturation) = 0; + + CV_WRAP virtual float getBias() const = 0; + CV_WRAP virtual void setBias(float bias) = 0; +}; + +/** @brief Creates TonemapDrago object + +@param gamma gamma value for gamma correction. See createTonemap +@param saturation positive saturation enhancement value. 1.0 preserves saturation, values greater +than 1 increase saturation and values less than 1 decrease it. +@param bias value for bias function in [0, 1] range. Values from 0.7 to 0.9 usually give best +results, default value is 0.85. + */ +CV_EXPORTS_W Ptr createTonemapDrago(float gamma = 1.0f, float saturation = 1.0f, float bias = 0.85f); + + +/** @brief This is a global tonemapping operator that models human visual system. + +Mapping function is controlled by adaptation parameter, that is computed using light adaptation and +color adaptation. + +For more information see @cite RD05 . + */ +class CV_EXPORTS_W TonemapReinhard : public Tonemap +{ +public: + CV_WRAP virtual float getIntensity() const = 0; + CV_WRAP virtual void setIntensity(float intensity) = 0; + + CV_WRAP virtual float getLightAdaptation() const = 0; + CV_WRAP virtual void setLightAdaptation(float light_adapt) = 0; + + CV_WRAP virtual float getColorAdaptation() const = 0; + CV_WRAP virtual void setColorAdaptation(float color_adapt) = 0; +}; + +/** @brief Creates TonemapReinhard object + +@param gamma gamma value for gamma correction. See createTonemap +@param intensity result intensity in [-8, 8] range. Greater intensity produces brighter results. +@param light_adapt light adaptation in [0, 1] range. If 1 adaptation is based only on pixel +value, if 0 it's global, otherwise it's a weighted mean of this two cases. +@param color_adapt chromatic adaptation in [0, 1] range. If 1 channels are treated independently, +if 0 adaptation level is the same for each channel. + */ +CV_EXPORTS_W Ptr +createTonemapReinhard(float gamma = 1.0f, float intensity = 0.0f, float light_adapt = 1.0f, float color_adapt = 0.0f); + +/** @brief This algorithm transforms image to contrast using gradients on all levels of gaussian pyramid, +transforms contrast values to HVS response and scales the response. After this the image is +reconstructed from new contrast values. + +For more information see @cite MM06 . + */ +class CV_EXPORTS_W TonemapMantiuk : public Tonemap +{ +public: + CV_WRAP virtual float getScale() const = 0; + CV_WRAP virtual void setScale(float scale) = 0; + + CV_WRAP virtual float getSaturation() const = 0; + CV_WRAP virtual void setSaturation(float saturation) = 0; +}; + +/** @brief Creates TonemapMantiuk object + +@param gamma gamma value for gamma correction. See createTonemap +@param scale contrast scale factor. HVS response is multiplied by this parameter, thus compressing +dynamic range. Values from 0.6 to 0.9 produce best results. +@param saturation saturation enhancement value. See createTonemapDrago + */ +CV_EXPORTS_W Ptr +createTonemapMantiuk(float gamma = 1.0f, float scale = 0.7f, float saturation = 1.0f); + +/** @brief The base class for algorithms that align images of the same scene with different exposures + */ +class CV_EXPORTS_W AlignExposures : public Algorithm +{ +public: + /** @brief Aligns images + + @param src vector of input images + @param dst vector of aligned images + @param times vector of exposure time values for each image + @param response 256x1 matrix with inverse camera response function for each pixel value, it should + have the same number of channels as images. + */ + CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst, + InputArray times, InputArray response) = 0; +}; + +/** @brief This algorithm converts images to median threshold bitmaps (1 for pixels brighter than median +luminance and 0 otherwise) and than aligns the resulting bitmaps using bit operations. + +It is invariant to exposure, so exposure values and camera response are not necessary. + +In this implementation new image regions are filled with zeros. + +For more information see @cite GW03 . + */ +class CV_EXPORTS_W AlignMTB : public AlignExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + + /** @brief Short version of process, that doesn't take extra arguments. + + @param src vector of input images + @param dst vector of aligned images + */ + CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst) = 0; + + /** @brief Calculates shift between two images, i. e. how to shift the second image to correspond it with the + first. + + @param img0 first image + @param img1 second image + */ + CV_WRAP virtual Point calculateShift(InputArray img0, InputArray img1) = 0; + /** @brief Helper function, that shift Mat filling new regions with zeros. + + @param src input image + @param dst result image + @param shift shift value + */ + CV_WRAP virtual void shiftMat(InputArray src, OutputArray dst, const Point shift) = 0; + /** @brief Computes median threshold and exclude bitmaps of given image. + + @param img input image + @param tb median threshold bitmap + @param eb exclude bitmap + */ + CV_WRAP virtual void computeBitmaps(InputArray img, OutputArray tb, OutputArray eb) = 0; + + CV_WRAP virtual int getMaxBits() const = 0; + CV_WRAP virtual void setMaxBits(int max_bits) = 0; + + CV_WRAP virtual int getExcludeRange() const = 0; + CV_WRAP virtual void setExcludeRange(int exclude_range) = 0; + + CV_WRAP virtual bool getCut() const = 0; + CV_WRAP virtual void setCut(bool value) = 0; +}; + +/** @brief Creates AlignMTB object + +@param max_bits logarithm to the base 2 of maximal shift in each dimension. Values of 5 and 6 are +usually good enough (31 and 63 pixels shift respectively). +@param exclude_range range for exclusion bitmap that is constructed to suppress noise around the +median value. +@param cut if true cuts images, otherwise fills the new regions with zeros. + */ +CV_EXPORTS_W Ptr createAlignMTB(int max_bits = 6, int exclude_range = 4, bool cut = true); + +/** @brief The base class for camera response calibration algorithms. + */ +class CV_EXPORTS_W CalibrateCRF : public Algorithm +{ +public: + /** @brief Recovers inverse camera response. + + @param src vector of input images + @param dst 256x1 matrix with inverse camera response function + @param times vector of exposure time values for each image + */ + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; +}; + +/** @brief Inverse camera response function is extracted for each brightness value by minimizing an objective +function as linear system. Objective function is constructed using pixel values on the same position +in all images, extra term is added to make the result smoother. + +For more information see @cite DM97 . + */ +class CV_EXPORTS_W CalibrateDebevec : public CalibrateCRF +{ +public: + CV_WRAP virtual float getLambda() const = 0; + CV_WRAP virtual void setLambda(float lambda) = 0; + + CV_WRAP virtual int getSamples() const = 0; + CV_WRAP virtual void setSamples(int samples) = 0; + + CV_WRAP virtual bool getRandom() const = 0; + CV_WRAP virtual void setRandom(bool random) = 0; +}; + +/** @brief Creates CalibrateDebevec object + +@param samples number of pixel locations to use +@param lambda smoothness term weight. Greater values produce smoother results, but can alter the +response. +@param random if true sample pixel locations are chosen at random, otherwise they form a +rectangular grid. + */ +CV_EXPORTS_W Ptr createCalibrateDebevec(int samples = 70, float lambda = 10.0f, bool random = false); + +/** @brief Inverse camera response function is extracted for each brightness value by minimizing an objective +function as linear system. This algorithm uses all image pixels. + +For more information see @cite RB99 . + */ +class CV_EXPORTS_W CalibrateRobertson : public CalibrateCRF +{ +public: + CV_WRAP virtual int getMaxIter() const = 0; + CV_WRAP virtual void setMaxIter(int max_iter) = 0; + + CV_WRAP virtual float getThreshold() const = 0; + CV_WRAP virtual void setThreshold(float threshold) = 0; + + CV_WRAP virtual Mat getRadiance() const = 0; +}; + +/** @brief Creates CalibrateRobertson object + +@param max_iter maximal number of Gauss-Seidel solver iterations. +@param threshold target difference between results of two successive steps of the minimization. + */ +CV_EXPORTS_W Ptr createCalibrateRobertson(int max_iter = 30, float threshold = 0.01f); + +/** @brief The base class algorithms that can merge exposure sequence to a single image. + */ +class CV_EXPORTS_W MergeExposures : public Algorithm +{ +public: + /** @brief Merges images. + + @param src vector of input images + @param dst result image + @param times vector of exposure time values for each image + @param response 256x1 matrix with inverse camera response function for each pixel value, it should + have the same number of channels as images. + */ + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) = 0; +}; + +/** @brief The resulting HDR image is calculated as weighted average of the exposures considering exposure +values and camera response. + +For more information see @cite DM97 . + */ +class CV_EXPORTS_W MergeDebevec : public MergeExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; +}; + +/** @brief Creates MergeDebevec object + */ +CV_EXPORTS_W Ptr createMergeDebevec(); + +/** @brief Pixels are weighted using contrast, saturation and well-exposedness measures, than images are +combined using laplacian pyramids. + +The resulting image weight is constructed as weighted average of contrast, saturation and +well-exposedness measures. + +The resulting image doesn't require tonemapping and can be converted to 8-bit image by multiplying +by 255, but it's recommended to apply gamma correction and/or linear tonemapping. + +For more information see @cite MK07 . + */ +class CV_EXPORTS_W MergeMertens : public MergeExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + /** @brief Short version of process, that doesn't take extra arguments. + + @param src vector of input images + @param dst result image + */ + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst) = 0; + + CV_WRAP virtual float getContrastWeight() const = 0; + CV_WRAP virtual void setContrastWeight(float contrast_weiht) = 0; + + CV_WRAP virtual float getSaturationWeight() const = 0; + CV_WRAP virtual void setSaturationWeight(float saturation_weight) = 0; + + CV_WRAP virtual float getExposureWeight() const = 0; + CV_WRAP virtual void setExposureWeight(float exposure_weight) = 0; +}; + +/** @brief Creates MergeMertens object + +@param contrast_weight contrast measure weight. See MergeMertens. +@param saturation_weight saturation measure weight +@param exposure_weight well-exposedness measure weight + */ +CV_EXPORTS_W Ptr +createMergeMertens(float contrast_weight = 1.0f, float saturation_weight = 1.0f, float exposure_weight = 0.0f); + +/** @brief The resulting HDR image is calculated as weighted average of the exposures considering exposure +values and camera response. + +For more information see @cite RB99 . + */ +class CV_EXPORTS_W MergeRobertson : public MergeExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; +}; + +/** @brief Creates MergeRobertson object + */ +CV_EXPORTS_W Ptr createMergeRobertson(); + +//! @} photo_hdr + +//! @addtogroup photo_decolor +//! @{ + +/** @brief Transforms a color image to a grayscale image. It is a basic tool in digital printing, stylized +black-and-white photograph rendering, and in many single channel image processing applications +@cite CL12 . + +@param src Input 8-bit 3-channel image. +@param grayscale Output 8-bit 1-channel image. +@param color_boost Output 8-bit 3-channel image. + +This function is to be applied on color images. + */ +CV_EXPORTS_W void decolor( InputArray src, OutputArray grayscale, OutputArray color_boost); + +//! @} photo_decolor + +//! @addtogroup photo_clone +//! @{ + + +//! seamlessClone algorithm flags +enum +{ + /** The power of the method is fully expressed when inserting objects with complex outlines into a new background*/ + NORMAL_CLONE = 1, + /** The classic method, color-based selection and alpha masking might be time consuming and often leaves an undesirable + halo. Seamless cloning, even averaged with the original image, is not effective. Mixed seamless cloning based on a loose selection proves effective.*/ + MIXED_CLONE = 2, + /** Monochrome transfer allows the user to easily replace certain features of one object by alternative features.*/ + MONOCHROME_TRANSFER = 3}; + + +/** @example samples/cpp/tutorial_code/photo/seamless_cloning/cloning_demo.cpp +An example using seamlessClone function +*/ +/** @brief Image editing tasks concern either global changes (color/intensity corrections, filters, +deformations) or local changes concerned to a selection. Here we are interested in achieving local +changes, ones that are restricted to a region manually selected (ROI), in a seamless and effortless +manner. The extent of the changes ranges from slight distortions to complete replacement by novel +content @cite PM03 . + +@param src Input 8-bit 3-channel image. +@param dst Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param p Point in dst image where object is placed. +@param blend Output image with the same size and type as dst. +@param flags Cloning method that could be cv::NORMAL_CLONE, cv::MIXED_CLONE or cv::MONOCHROME_TRANSFER + */ +CV_EXPORTS_W void seamlessClone( InputArray src, InputArray dst, InputArray mask, Point p, + OutputArray blend, int flags); + +/** @brief Given an original color image, two differently colored versions of this image can be mixed +seamlessly. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src . +@param red_mul R-channel multiply factor. +@param green_mul G-channel multiply factor. +@param blue_mul B-channel multiply factor. + +Multiplication factor is between .5 to 2.5. + */ +CV_EXPORTS_W void colorChange(InputArray src, InputArray mask, OutputArray dst, float red_mul = 1.0f, + float green_mul = 1.0f, float blue_mul = 1.0f); + +/** @brief Applying an appropriate non-linear transformation to the gradient field inside the selection and +then integrating back with a Poisson solver, modifies locally the apparent illumination of an image. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src. +@param alpha Value ranges between 0-2. +@param beta Value ranges between 0-2. + +This is useful to highlight under-exposed foreground objects or to reduce specular reflections. + */ +CV_EXPORTS_W void illuminationChange(InputArray src, InputArray mask, OutputArray dst, + float alpha = 0.2f, float beta = 0.4f); + +/** @brief By retaining only the gradients at edge locations, before integrating with the Poisson solver, one +washes out the texture of the selected region, giving its contents a flat aspect. Here Canny Edge %Detector is used. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src. +@param low_threshold %Range from 0 to 100. +@param high_threshold Value \> 100. +@param kernel_size The size of the Sobel kernel to be used. + +@note +The algorithm assumes that the color of the source image is close to that of the destination. This +assumption means that when the colors don't match, the source image color gets tinted toward the +color of the destination image. + */ +CV_EXPORTS_W void textureFlattening(InputArray src, InputArray mask, OutputArray dst, + float low_threshold = 30, float high_threshold = 45, + int kernel_size = 3); + +//! @} photo_clone + +//! @addtogroup photo_render +//! @{ + +//! Edge preserving filters +enum +{ + RECURS_FILTER = 1, //!< Recursive Filtering + NORMCONV_FILTER = 2 //!< Normalized Convolution Filtering +}; + +/** @brief Filtering is the fundamental operation in image and video processing. Edge-preserving smoothing +filters are used in many different applications @cite EM11 . + +@param src Input 8-bit 3-channel image. +@param dst Output 8-bit 3-channel image. +@param flags Edge preserving filters: cv::RECURS_FILTER or cv::NORMCONV_FILTER +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. + */ +CV_EXPORTS_W void edgePreservingFilter(InputArray src, OutputArray dst, int flags = 1, + float sigma_s = 60, float sigma_r = 0.4f); + +/** @brief This filter enhances the details of a particular image. + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. + */ +CV_EXPORTS_W void detailEnhance(InputArray src, OutputArray dst, float sigma_s = 10, + float sigma_r = 0.15f); + +/** @example samples/cpp/tutorial_code/photo/non_photorealistic_rendering/npr_demo.cpp +An example using non-photorealistic line drawing functions +*/ +/** @brief Pencil-like non-photorealistic line drawing + +@param src Input 8-bit 3-channel image. +@param dst1 Output 8-bit 1-channel image. +@param dst2 Output image with the same size and type as src. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. +@param shade_factor %Range between 0 to 0.1. + */ +CV_EXPORTS_W void pencilSketch(InputArray src, OutputArray dst1, OutputArray dst2, + float sigma_s = 60, float sigma_r = 0.07f, float shade_factor = 0.02f); + +/** @brief Stylization aims to produce digital imagery with a wide variety of effects not focused on +photorealism. Edge-aware filters are ideal for stylization, as they can abstract regions of low +contrast while preserving, or enhancing, high-contrast features. + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. + */ +CV_EXPORTS_W void stylization(InputArray src, OutputArray dst, float sigma_s = 60, + float sigma_r = 0.45f); + +//! @} photo_render + +//! @} photo + +} // cv + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/cuda.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/cuda.hpp new file mode 100644 index 0000000..a2f3816 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/cuda.hpp @@ -0,0 +1,132 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_PHOTO_CUDA_HPP +#define OPENCV_PHOTO_CUDA_HPP + +#include "opencv2/core/cuda.hpp" + +namespace cv { namespace cuda { + +//! @addtogroup photo_denoise +//! @{ + +/** @brief Performs pure non local means denoising without any simplification, and thus it is not fast. + +@param src Source image. Supports only CV_8UC1, CV_8UC2 and CV_8UC3. +@param dst Destination image. +@param h Filter sigma regulating filter strength for color. +@param search_window Size of search window. +@param block_size Size of block used for computing weights. +@param borderMode Border type. See borderInterpolate for details. BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param stream Stream for the asynchronous version. + +@sa + fastNlMeansDenoising + */ +CV_EXPORTS void nonLocalMeans(InputArray src, OutputArray dst, + float h, + int search_window = 21, + int block_size = 7, + int borderMode = BORDER_DEFAULT, + Stream& stream = Stream::Null()); + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit 1-channel, 2-channel or 3-channel image. +@param dst Output image with the same size and type as src . +@param h Parameter regulating filter strength. Big h value perfectly removes noise but also +removes image details, smaller h value preserves details but also preserves some noise +@param search_window Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater search_window - greater +denoising time. Recommended value 21 pixels +@param block_size Size in pixels of the template patch that is used to compute weights. Should be +odd. Recommended value 7 pixels +@param stream Stream for the asynchronous invocations. + +This function expected to be applied to grayscale images. For colored images look at +FastNonLocalMeansDenoising::labMethod. + +@sa + fastNlMeansDenoising + */ +CV_EXPORTS void fastNlMeansDenoising(InputArray src, OutputArray dst, + float h, + int search_window = 21, + int block_size = 7, + Stream& stream = Stream::Null()); + +/** @brief Modification of fastNlMeansDenoising function for colored images + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src . +@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but +also removes image details, smaller h value preserves details but also preserves some noise +@param photo_render float The same as h but for color components. For most images value equals 10 will be +enough to remove colored noise and do not distort colors +@param search_window Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater search_window - greater +denoising time. Recommended value 21 pixels +@param block_size Size in pixels of the template patch that is used to compute weights. Should be +odd. Recommended value 7 pixels +@param stream Stream for the asynchronous invocations. + +The function converts image to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using FastNonLocalMeansDenoising::simpleMethod function. + +@sa + fastNlMeansDenoisingColored + */ +CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst, + float h_luminance, float photo_render, + int search_window = 21, + int block_size = 7, + Stream& stream = Stream::Null()); + +//! @} photo + +}} // namespace cv { namespace cuda { + +#endif /* OPENCV_PHOTO_CUDA_HPP */ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/legacy/constants_c.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/legacy/constants_c.h new file mode 100644 index 0000000..ec1d440 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/legacy/constants_c.h @@ -0,0 +1,14 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_PHOTO_LEGACY_CONSTANTS_H +#define OPENCV_PHOTO_LEGACY_CONSTANTS_H + +enum InpaintingModes +{ + CV_INPAINT_NS =0, + CV_INPAINT_TELEA =1 +}; + +#endif // OPENCV_PHOTO_LEGACY_CONSTANTS_H diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/photo.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/photo.hpp new file mode 100644 index 0000000..8af5e9f --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/photo/photo.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/photo.hpp" diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video.hpp new file mode 100644 index 0000000..a3dde60 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video.hpp @@ -0,0 +1,59 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_VIDEO_HPP +#define OPENCV_VIDEO_HPP + +/** + @defgroup video Video Analysis + @{ + @defgroup video_motion Motion Analysis + @defgroup video_track Object Tracking + @defgroup video_c C API + @} +*/ + +#include "opencv2/video/tracking.hpp" +#include "opencv2/video/background_segm.hpp" + +#endif //OPENCV_VIDEO_HPP diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/background_segm.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/background_segm.hpp new file mode 100644 index 0000000..e1dfa15 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/background_segm.hpp @@ -0,0 +1,317 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_BACKGROUND_SEGM_HPP +#define OPENCV_BACKGROUND_SEGM_HPP + +#include "opencv2/core.hpp" + +namespace cv +{ + +//! @addtogroup video_motion +//! @{ + +/** @brief Base class for background/foreground segmentation. : + +The class is only used to define the common interface for the whole family of background/foreground +segmentation algorithms. + */ +class CV_EXPORTS_W BackgroundSubtractor : public Algorithm +{ +public: + /** @brief Computes a foreground mask. + + @param image Next video frame. + @param fgmask The output foreground mask as an 8-bit binary image. + @param learningRate The value between 0 and 1 that indicates how fast the background model is + learnt. Negative parameter value makes the algorithm to use some automatically chosen learning + rate. 0 means that the background model is not updated at all, 1 means that the background model + is completely reinitialized from the last frame. + */ + CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) = 0; + + /** @brief Computes a background image. + + @param backgroundImage The output background image. + + @note Sometimes the background image can be very blurry, as it contain the average background + statistics. + */ + CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const = 0; +}; + + +/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm. + +The class implements the Gaussian mixture model background subtraction described in @cite Zivkovic2004 +and @cite Zivkovic2006 . + */ +class CV_EXPORTS_W BackgroundSubtractorMOG2 : public BackgroundSubtractor +{ +public: + /** @brief Returns the number of last frames that affect the background model + */ + CV_WRAP virtual int getHistory() const = 0; + /** @brief Sets the number of last frames that affect the background model + */ + CV_WRAP virtual void setHistory(int history) = 0; + + /** @brief Returns the number of gaussian components in the background model + */ + CV_WRAP virtual int getNMixtures() const = 0; + /** @brief Sets the number of gaussian components in the background model. + + The model needs to be reinitalized to reserve memory. + */ + CV_WRAP virtual void setNMixtures(int nmixtures) = 0;//needs reinitialization! + + /** @brief Returns the "background ratio" parameter of the algorithm + + If a foreground pixel keeps semi-constant value for about backgroundRatio\*history frames, it's + considered background and added to the model as a center of a new component. It corresponds to TB + parameter in the paper. + */ + CV_WRAP virtual double getBackgroundRatio() const = 0; + /** @brief Sets the "background ratio" parameter of the algorithm + */ + CV_WRAP virtual void setBackgroundRatio(double ratio) = 0; + + /** @brief Returns the variance threshold for the pixel-model match + + The main threshold on the squared Mahalanobis distance to decide if the sample is well described by + the background model or not. Related to Cthr from the paper. + */ + CV_WRAP virtual double getVarThreshold() const = 0; + /** @brief Sets the variance threshold for the pixel-model match + */ + CV_WRAP virtual void setVarThreshold(double varThreshold) = 0; + + /** @brief Returns the variance threshold for the pixel-model match used for new mixture component generation + + Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the + existing components (corresponds to Tg in the paper). If a pixel is not close to any component, it + is considered foreground or added as a new component. 3 sigma =\> Tg=3\*3=9 is default. A smaller Tg + value generates more components. A higher Tg value may result in a small number of components but + they can grow too large. + */ + CV_WRAP virtual double getVarThresholdGen() const = 0; + /** @brief Sets the variance threshold for the pixel-model match used for new mixture component generation + */ + CV_WRAP virtual void setVarThresholdGen(double varThresholdGen) = 0; + + /** @brief Returns the initial variance of each gaussian component + */ + CV_WRAP virtual double getVarInit() const = 0; + /** @brief Sets the initial variance of each gaussian component + */ + CV_WRAP virtual void setVarInit(double varInit) = 0; + + CV_WRAP virtual double getVarMin() const = 0; + CV_WRAP virtual void setVarMin(double varMin) = 0; + + CV_WRAP virtual double getVarMax() const = 0; + CV_WRAP virtual void setVarMax(double varMax) = 0; + + /** @brief Returns the complexity reduction threshold + + This parameter defines the number of samples needed to accept to prove the component exists. CT=0.05 + is a default value for all the samples. By setting CT=0 you get an algorithm very similar to the + standard Stauffer&Grimson algorithm. + */ + CV_WRAP virtual double getComplexityReductionThreshold() const = 0; + /** @brief Sets the complexity reduction threshold + */ + CV_WRAP virtual void setComplexityReductionThreshold(double ct) = 0; + + /** @brief Returns the shadow detection flag + + If true, the algorithm detects shadows and marks them. See createBackgroundSubtractorMOG2 for + details. + */ + CV_WRAP virtual bool getDetectShadows() const = 0; + /** @brief Enables or disables shadow detection + */ + CV_WRAP virtual void setDetectShadows(bool detectShadows) = 0; + + /** @brief Returns the shadow value + + Shadow value is the value used to mark shadows in the foreground mask. Default value is 127. Value 0 + in the mask always means background, 255 means foreground. + */ + CV_WRAP virtual int getShadowValue() const = 0; + /** @brief Sets the shadow value + */ + CV_WRAP virtual void setShadowValue(int value) = 0; + + /** @brief Returns the shadow threshold + + A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in + the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel + is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiara, + *Detecting Moving Shadows...*, IEEE PAMI,2003. + */ + CV_WRAP virtual double getShadowThreshold() const = 0; + /** @brief Sets the shadow threshold + */ + CV_WRAP virtual void setShadowThreshold(double threshold) = 0; + + /** @brief Computes a foreground mask. + + @param image Next video frame. Floating point frame will be used without scaling and should be in range \f$[0,255]\f$. + @param fgmask The output foreground mask as an 8-bit binary image. + @param learningRate The value between 0 and 1 that indicates how fast the background model is + learnt. Negative parameter value makes the algorithm to use some automatically chosen learning + rate. 0 means that the background model is not updated at all, 1 means that the background model + is completely reinitialized from the last frame. + */ + CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0; +}; + +/** @brief Creates MOG2 Background Subtractor + +@param history Length of the history. +@param varThreshold Threshold on the squared Mahalanobis distance between the pixel and the model +to decide whether a pixel is well described by the background model. This parameter does not +affect the background update. +@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the +speed a bit, so if you do not need this feature, set the parameter to false. + */ +CV_EXPORTS_W Ptr + createBackgroundSubtractorMOG2(int history=500, double varThreshold=16, + bool detectShadows=true); + +/** @brief K-nearest neighbours - based Background/Foreground Segmentation Algorithm. + +The class implements the K-nearest neighbours background subtraction described in @cite Zivkovic2006 . +Very efficient if number of foreground pixels is low. + */ +class CV_EXPORTS_W BackgroundSubtractorKNN : public BackgroundSubtractor +{ +public: + /** @brief Returns the number of last frames that affect the background model + */ + CV_WRAP virtual int getHistory() const = 0; + /** @brief Sets the number of last frames that affect the background model + */ + CV_WRAP virtual void setHistory(int history) = 0; + + /** @brief Returns the number of data samples in the background model + */ + CV_WRAP virtual int getNSamples() const = 0; + /** @brief Sets the number of data samples in the background model. + + The model needs to be reinitalized to reserve memory. + */ + CV_WRAP virtual void setNSamples(int _nN) = 0;//needs reinitialization! + + /** @brief Returns the threshold on the squared distance between the pixel and the sample + + The threshold on the squared distance between the pixel and the sample to decide whether a pixel is + close to a data sample. + */ + CV_WRAP virtual double getDist2Threshold() const = 0; + /** @brief Sets the threshold on the squared distance + */ + CV_WRAP virtual void setDist2Threshold(double _dist2Threshold) = 0; + + /** @brief Returns the number of neighbours, the k in the kNN. + + K is the number of samples that need to be within dist2Threshold in order to decide that that + pixel is matching the kNN background model. + */ + CV_WRAP virtual int getkNNSamples() const = 0; + /** @brief Sets the k in the kNN. How many nearest neighbours need to match. + */ + CV_WRAP virtual void setkNNSamples(int _nkNN) = 0; + + /** @brief Returns the shadow detection flag + + If true, the algorithm detects shadows and marks them. See createBackgroundSubtractorKNN for + details. + */ + CV_WRAP virtual bool getDetectShadows() const = 0; + /** @brief Enables or disables shadow detection + */ + CV_WRAP virtual void setDetectShadows(bool detectShadows) = 0; + + /** @brief Returns the shadow value + + Shadow value is the value used to mark shadows in the foreground mask. Default value is 127. Value 0 + in the mask always means background, 255 means foreground. + */ + CV_WRAP virtual int getShadowValue() const = 0; + /** @brief Sets the shadow value + */ + CV_WRAP virtual void setShadowValue(int value) = 0; + + /** @brief Returns the shadow threshold + + A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in + the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel + is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiara, + *Detecting Moving Shadows...*, IEEE PAMI,2003. + */ + CV_WRAP virtual double getShadowThreshold() const = 0; + /** @brief Sets the shadow threshold + */ + CV_WRAP virtual void setShadowThreshold(double threshold) = 0; +}; + +/** @brief Creates KNN Background Subtractor + +@param history Length of the history. +@param dist2Threshold Threshold on the squared distance between the pixel and the sample to decide +whether a pixel is close to that sample. This parameter does not affect the background update. +@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the +speed a bit, so if you do not need this feature, set the parameter to false. + */ +CV_EXPORTS_W Ptr + createBackgroundSubtractorKNN(int history=500, double dist2Threshold=400.0, + bool detectShadows=true); + +//! @} video_motion + +} // cv + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/legacy/constants_c.h b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/legacy/constants_c.h new file mode 100644 index 0000000..1a98f52 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/legacy/constants_c.h @@ -0,0 +1,16 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_VIDEO_LEGACY_CONSTANTS_H +#define OPENCV_VIDEO_LEGACY_CONSTANTS_H + +enum +{ + CV_LKFLOW_PYR_A_READY = 1, + CV_LKFLOW_PYR_B_READY = 2, + CV_LKFLOW_INITIAL_GUESSES = 4, + CV_LKFLOW_GET_MIN_EIGENVALS = 8 +}; + +#endif // OPENCV_VIDEO_LEGACY_CONSTANTS_H diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/tracking.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/tracking.hpp new file mode 100644 index 0000000..b44f685 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/tracking.hpp @@ -0,0 +1,827 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_TRACKING_HPP +#define OPENCV_TRACKING_HPP + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +namespace cv +{ + +//! @addtogroup video_track +//! @{ + +enum { OPTFLOW_USE_INITIAL_FLOW = 4, + OPTFLOW_LK_GET_MIN_EIGENVALS = 8, + OPTFLOW_FARNEBACK_GAUSSIAN = 256 + }; + +/** @brief Finds an object center, size, and orientation. + +@param probImage Back projection of the object histogram. See calcBackProject. +@param window Initial search window. +@param criteria Stop criteria for the underlying meanShift. +returns +(in old interfaces) Number of iterations CAMSHIFT took to converge +The function implements the CAMSHIFT object tracking algorithm @cite Bradski98 . First, it finds an +object center using meanShift and then adjusts the window size and finds the optimal rotation. The +function returns the rotated rectangle structure that includes the object position, size, and +orientation. The next position of the search window can be obtained with RotatedRect::boundingRect() + +See the OpenCV sample camshiftdemo.c that tracks colored objects. + +@note +- (Python) A sample explaining the camshift tracking algorithm can be found at + opencv_source_code/samples/python/camshift.py + */ +CV_EXPORTS_W RotatedRect CamShift( InputArray probImage, CV_IN_OUT Rect& window, + TermCriteria criteria ); +/** @example samples/cpp/camshiftdemo.cpp +An example using the mean-shift tracking algorithm +*/ + +/** @brief Finds an object on a back projection image. + +@param probImage Back projection of the object histogram. See calcBackProject for details. +@param window Initial search window. +@param criteria Stop criteria for the iterative search algorithm. +returns +: Number of iterations CAMSHIFT took to converge. +The function implements the iterative object search algorithm. It takes the input back projection of +an object and the initial position. The mass center in window of the back projection image is +computed and the search window center shifts to the mass center. The procedure is repeated until the +specified number of iterations criteria.maxCount is done or until the window center shifts by less +than criteria.epsilon. The algorithm is used inside CamShift and, unlike CamShift , the search +window size or orientation do not change during the search. You can simply pass the output of +calcBackProject to this function. But better results can be obtained if you pre-filter the back +projection and remove the noise. For example, you can do this by retrieving connected components +with findContours , throwing away contours with small area ( contourArea ), and rendering the +remaining contours with drawContours. + + */ +CV_EXPORTS_W int meanShift( InputArray probImage, CV_IN_OUT Rect& window, TermCriteria criteria ); + +/** @brief Constructs the image pyramid which can be passed to calcOpticalFlowPyrLK. + +@param img 8-bit input image. +@param pyramid output pyramid. +@param winSize window size of optical flow algorithm. Must be not less than winSize argument of +calcOpticalFlowPyrLK. It is needed to calculate required padding for pyramid levels. +@param maxLevel 0-based maximal pyramid level number. +@param withDerivatives set to precompute gradients for the every pyramid level. If pyramid is +constructed without the gradients then calcOpticalFlowPyrLK will calculate them internally. +@param pyrBorder the border mode for pyramid layers. +@param derivBorder the border mode for gradients. +@param tryReuseInputImage put ROI of input image into the pyramid if possible. You can pass false +to force data copying. +@return number of levels in constructed pyramid. Can be less than maxLevel. + */ +CV_EXPORTS_W int buildOpticalFlowPyramid( InputArray img, OutputArrayOfArrays pyramid, + Size winSize, int maxLevel, bool withDerivatives = true, + int pyrBorder = BORDER_REFLECT_101, + int derivBorder = BORDER_CONSTANT, + bool tryReuseInputImage = true ); + +/** @example samples/cpp/lkdemo.cpp +An example using the Lucas-Kanade optical flow algorithm +*/ + +/** @brief Calculates an optical flow for a sparse feature set using the iterative Lucas-Kanade method with +pyramids. + +@param prevImg first 8-bit input image or pyramid constructed by buildOpticalFlowPyramid. +@param nextImg second input image or pyramid of the same size and the same type as prevImg. +@param prevPts vector of 2D points for which the flow needs to be found; point coordinates must be +single-precision floating-point numbers. +@param nextPts output vector of 2D points (with single-precision floating-point coordinates) +containing the calculated new positions of input features in the second image; when +OPTFLOW_USE_INITIAL_FLOW flag is passed, the vector must have the same size as in the input. +@param status output status vector (of unsigned chars); each element of the vector is set to 1 if +the flow for the corresponding features has been found, otherwise, it is set to 0. +@param err output vector of errors; each element of the vector is set to an error for the +corresponding feature, type of the error measure can be set in flags parameter; if the flow wasn't +found then the error is not defined (use the status parameter to find such cases). +@param winSize size of the search window at each pyramid level. +@param maxLevel 0-based maximal pyramid level number; if set to 0, pyramids are not used (single +level), if set to 1, two levels are used, and so on; if pyramids are passed to input then +algorithm will use as many levels as pyramids have but no more than maxLevel. +@param criteria parameter, specifying the termination criteria of the iterative search algorithm +(after the specified maximum number of iterations criteria.maxCount or when the search window +moves by less than criteria.epsilon. +@param flags operation flags: + - **OPTFLOW_USE_INITIAL_FLOW** uses initial estimations, stored in nextPts; if the flag is + not set, then prevPts is copied to nextPts and is considered the initial estimate. + - **OPTFLOW_LK_GET_MIN_EIGENVALS** use minimum eigen values as an error measure (see + minEigThreshold description); if the flag is not set, then L1 distance between patches + around the original and a moved point, divided by number of pixels in a window, is used as a + error measure. +@param minEigThreshold the algorithm calculates the minimum eigen value of a 2x2 normal matrix of +optical flow equations (this matrix is called a spatial gradient matrix in @cite Bouguet00), divided +by number of pixels in a window; if this value is less than minEigThreshold, then a corresponding +feature is filtered out and its flow is not processed, so it allows to remove bad points and get a +performance boost. + +The function implements a sparse iterative version of the Lucas-Kanade optical flow in pyramids. See +@cite Bouguet00 . The function is parallelized with the TBB library. + +@note + +- An example using the Lucas-Kanade optical flow algorithm can be found at + opencv_source_code/samples/cpp/lkdemo.cpp +- (Python) An example using the Lucas-Kanade optical flow algorithm can be found at + opencv_source_code/samples/python/lk_track.py +- (Python) An example using the Lucas-Kanade tracker for homography matching can be found at + opencv_source_code/samples/python/lk_homography.py + */ +CV_EXPORTS_W void calcOpticalFlowPyrLK( InputArray prevImg, InputArray nextImg, + InputArray prevPts, InputOutputArray nextPts, + OutputArray status, OutputArray err, + Size winSize = Size(21,21), int maxLevel = 3, + TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), + int flags = 0, double minEigThreshold = 1e-4 ); + +/** @brief Computes a dense optical flow using the Gunnar Farneback's algorithm. + +@param prev first 8-bit single-channel input image. +@param next second input image of the same size and the same type as prev. +@param flow computed flow image that has the same size as prev and type CV_32FC2. +@param pyr_scale parameter, specifying the image scale (\<1) to build pyramids for each image; +pyr_scale=0.5 means a classical pyramid, where each next layer is twice smaller than the previous +one. +@param levels number of pyramid layers including the initial image; levels=1 means that no extra +layers are created and only the original images are used. +@param winsize averaging window size; larger values increase the algorithm robustness to image +noise and give more chances for fast motion detection, but yield more blurred motion field. +@param iterations number of iterations the algorithm does at each pyramid level. +@param poly_n size of the pixel neighborhood used to find polynomial expansion in each pixel; +larger values mean that the image will be approximated with smoother surfaces, yielding more +robust algorithm and more blurred motion field, typically poly_n =5 or 7. +@param poly_sigma standard deviation of the Gaussian that is used to smooth derivatives used as a +basis for the polynomial expansion; for poly_n=5, you can set poly_sigma=1.1, for poly_n=7, a +good value would be poly_sigma=1.5. +@param flags operation flags that can be a combination of the following: + - **OPTFLOW_USE_INITIAL_FLOW** uses the input flow as an initial flow approximation. + - **OPTFLOW_FARNEBACK_GAUSSIAN** uses the Gaussian \f$\texttt{winsize}\times\texttt{winsize}\f$ + filter instead of a box filter of the same size for optical flow estimation; usually, this + option gives z more accurate flow than with a box filter, at the cost of lower speed; + normally, winsize for a Gaussian window should be set to a larger value to achieve the same + level of robustness. + +The function finds an optical flow for each prev pixel using the @cite Farneback2003 algorithm so that + +\f[\texttt{prev} (y,x) \sim \texttt{next} ( y + \texttt{flow} (y,x)[1], x + \texttt{flow} (y,x)[0])\f] + +@note + +- An example using the optical flow algorithm described by Gunnar Farneback can be found at + opencv_source_code/samples/cpp/fback.cpp +- (Python) An example using the optical flow algorithm described by Gunnar Farneback can be + found at opencv_source_code/samples/python/opt_flow.py + */ +CV_EXPORTS_W void calcOpticalFlowFarneback( InputArray prev, InputArray next, InputOutputArray flow, + double pyr_scale, int levels, int winsize, + int iterations, int poly_n, double poly_sigma, + int flags ); + +/** @brief Computes an optimal affine transformation between two 2D point sets. + +@param src First input 2D point set stored in std::vector or Mat, or an image stored in Mat. +@param dst Second input 2D point set of the same size and the same type as A, or another image. +@param fullAffine If true, the function finds an optimal affine transformation with no additional +restrictions (6 degrees of freedom). Otherwise, the class of transformations to choose from is +limited to combinations of translation, rotation, and uniform scaling (4 degrees of freedom). + +The function finds an optimal affine transform *[A|b]* (a 2 x 3 floating-point matrix) that +approximates best the affine transformation between: + +* Two point sets +* Two raster images. In this case, the function first finds some features in the src image and + finds the corresponding features in dst image. After that, the problem is reduced to the first + case. +In case of point sets, the problem is formulated as follows: you need to find a 2x2 matrix *A* and +2x1 vector *b* so that: + +\f[[A^*|b^*] = arg \min _{[A|b]} \sum _i \| \texttt{dst}[i] - A { \texttt{src}[i]}^T - b \| ^2\f] +where src[i] and dst[i] are the i-th points in src and dst, respectively +\f$[A|b]\f$ can be either arbitrary (when fullAffine=true ) or have a form of +\f[\begin{bmatrix} a_{11} & a_{12} & b_1 \\ -a_{12} & a_{11} & b_2 \end{bmatrix}\f] +when fullAffine=false. + +@deprecated Use cv::estimateAffine2D, cv::estimateAffinePartial2D instead. If you are using this function +with images, extract points using cv::calcOpticalFlowPyrLK and then use the estimation functions. + +@sa +estimateAffine2D, estimateAffinePartial2D, getAffineTransform, getPerspectiveTransform, findHomography + */ +CV_DEPRECATED CV_EXPORTS Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine ); + +enum +{ + MOTION_TRANSLATION = 0, + MOTION_EUCLIDEAN = 1, + MOTION_AFFINE = 2, + MOTION_HOMOGRAPHY = 3 +}; + +/** @brief Computes the Enhanced Correlation Coefficient value between two images @cite EP08 . + +@param templateImage single-channel template image; CV_8U or CV_32F array. +@param inputImage single-channel input image to be warped to provide an image similar to + templateImage, same type as templateImage. +@param inputMask An optional mask to indicate valid values of inputImage. + +@sa +findTransformECC + */ + +CV_EXPORTS_W double computeECC(InputArray templateImage, InputArray inputImage, InputArray inputMask = noArray()); + +/** @example samples/cpp/image_alignment.cpp +An example using the image alignment ECC algorithm +*/ + +/** @brief Finds the geometric transform (warp) between two images in terms of the ECC criterion @cite EP08 . + +@param templateImage single-channel template image; CV_8U or CV_32F array. +@param inputImage single-channel input image which should be warped with the final warpMatrix in +order to provide an image similar to templateImage, same type as templateImage. +@param warpMatrix floating-point \f$2\times 3\f$ or \f$3\times 3\f$ mapping matrix (warp). +@param motionType parameter, specifying the type of motion: + - **MOTION_TRANSLATION** sets a translational motion model; warpMatrix is \f$2\times 3\f$ with + the first \f$2\times 2\f$ part being the unity matrix and the rest two parameters being + estimated. + - **MOTION_EUCLIDEAN** sets a Euclidean (rigid) transformation as motion model; three + parameters are estimated; warpMatrix is \f$2\times 3\f$. + - **MOTION_AFFINE** sets an affine motion model (DEFAULT); six parameters are estimated; + warpMatrix is \f$2\times 3\f$. + - **MOTION_HOMOGRAPHY** sets a homography as a motion model; eight parameters are + estimated;\`warpMatrix\` is \f$3\times 3\f$. +@param criteria parameter, specifying the termination criteria of the ECC algorithm; +criteria.epsilon defines the threshold of the increment in the correlation coefficient between two +iterations (a negative criteria.epsilon makes criteria.maxcount the only termination criterion). +Default values are shown in the declaration above. +@param inputMask An optional mask to indicate valid values of inputImage. +@param gaussFiltSize An optional value indicating size of gaussian blur filter; (DEFAULT: 5) + +The function estimates the optimum transformation (warpMatrix) with respect to ECC criterion +(@cite EP08), that is + +\f[\texttt{warpMatrix} = \arg\max_{W} \texttt{ECC}(\texttt{templateImage}(x,y),\texttt{inputImage}(x',y'))\f] + +where + +\f[\begin{bmatrix} x' \\ y' \end{bmatrix} = W \cdot \begin{bmatrix} x \\ y \\ 1 \end{bmatrix}\f] + +(the equation holds with homogeneous coordinates for homography). It returns the final enhanced +correlation coefficient, that is the correlation coefficient between the template image and the +final warped input image. When a \f$3\times 3\f$ matrix is given with motionType =0, 1 or 2, the third +row is ignored. + +Unlike findHomography and estimateRigidTransform, the function findTransformECC implements an +area-based alignment that builds on intensity similarities. In essence, the function updates the +initial transformation that roughly aligns the images. If this information is missing, the identity +warp (unity matrix) is used as an initialization. Note that if images undergo strong +displacements/rotations, an initial transformation that roughly aligns the images is necessary +(e.g., a simple euclidean/similarity transform that allows for the images showing the same image +content approximately). Use inverse warping in the second image to take an image close to the first +one, i.e. use the flag WARP_INVERSE_MAP with warpAffine or warpPerspective. See also the OpenCV +sample image_alignment.cpp that demonstrates the use of the function. Note that the function throws +an exception if algorithm does not converges. + +@sa +computeECC, estimateAffine2D, estimateAffinePartial2D, findHomography + */ +CV_EXPORTS_W double findTransformECC( InputArray templateImage, InputArray inputImage, + InputOutputArray warpMatrix, int motionType, + TermCriteria criteria, + InputArray inputMask, int gaussFiltSize); + +/** @overload */ +CV_EXPORTS +double findTransformECC(InputArray templateImage, InputArray inputImage, + InputOutputArray warpMatrix, int motionType = MOTION_AFFINE, + TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 50, 0.001), + InputArray inputMask = noArray()); + +/** @example samples/cpp/kalman.cpp +An example using the standard Kalman filter +*/ + +/** @brief Kalman filter class. + +The class implements a standard Kalman filter , +@cite Welch95 . However, you can modify transitionMatrix, controlMatrix, and measurementMatrix to get +an extended Kalman filter functionality. +@note In C API when CvKalman\* kalmanFilter structure is not needed anymore, it should be released +with cvReleaseKalman(&kalmanFilter) + */ +class CV_EXPORTS_W KalmanFilter +{ +public: + CV_WRAP KalmanFilter(); + /** @overload + @param dynamParams Dimensionality of the state. + @param measureParams Dimensionality of the measurement. + @param controlParams Dimensionality of the control vector. + @param type Type of the created matrices that should be CV_32F or CV_64F. + */ + CV_WRAP KalmanFilter( int dynamParams, int measureParams, int controlParams = 0, int type = CV_32F ); + + /** @brief Re-initializes Kalman filter. The previous content is destroyed. + + @param dynamParams Dimensionality of the state. + @param measureParams Dimensionality of the measurement. + @param controlParams Dimensionality of the control vector. + @param type Type of the created matrices that should be CV_32F or CV_64F. + */ + void init( int dynamParams, int measureParams, int controlParams = 0, int type = CV_32F ); + + /** @brief Computes a predicted state. + + @param control The optional input control + */ + CV_WRAP const Mat& predict( const Mat& control = Mat() ); + + /** @brief Updates the predicted state from the measurement. + + @param measurement The measured system parameters + */ + CV_WRAP const Mat& correct( const Mat& measurement ); + + CV_PROP_RW Mat statePre; //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) + CV_PROP_RW Mat statePost; //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + CV_PROP_RW Mat transitionMatrix; //!< state transition matrix (A) + CV_PROP_RW Mat controlMatrix; //!< control matrix (B) (not used if there is no control) + CV_PROP_RW Mat measurementMatrix; //!< measurement matrix (H) + CV_PROP_RW Mat processNoiseCov; //!< process noise covariance matrix (Q) + CV_PROP_RW Mat measurementNoiseCov;//!< measurement noise covariance matrix (R) + CV_PROP_RW Mat errorCovPre; //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/ + CV_PROP_RW Mat gain; //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R) + CV_PROP_RW Mat errorCovPost; //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k) + + // temporary matrices + Mat temp1; + Mat temp2; + Mat temp3; + Mat temp4; + Mat temp5; +}; + + +/** @brief Read a .flo file + + @param path Path to the file to be loaded + + The function readOpticalFlow loads a flow field from a file and returns it as a single matrix. + Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the + flow in the horizontal direction (u), second - vertical (v). + */ +CV_EXPORTS_W Mat readOpticalFlow( const String& path ); +/** @brief Write a .flo to disk + + @param path Path to the file to be written + @param flow Flow field to be stored + + The function stores a flow field in a file, returns true on success, false otherwise. + The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds + to the flow in the horizontal direction (u), second - vertical (v). + */ +CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ); + +/** + Base class for dense optical flow algorithms +*/ +class CV_EXPORTS_W DenseOpticalFlow : public Algorithm +{ +public: + /** @brief Calculates an optical flow. + + @param I0 first 8-bit single-channel input image. + @param I1 second input image of the same size and the same type as prev. + @param flow computed flow image that has the same size as prev and type CV_32FC2. + */ + CV_WRAP virtual void calc( InputArray I0, InputArray I1, InputOutputArray flow ) = 0; + /** @brief Releases all inner buffers. + */ + CV_WRAP virtual void collectGarbage() = 0; +}; + +/** @brief Base interface for sparse optical flow algorithms. + */ +class CV_EXPORTS_W SparseOpticalFlow : public Algorithm +{ +public: + /** @brief Calculates a sparse optical flow. + + @param prevImg First input image. + @param nextImg Second input image of the same size and the same type as prevImg. + @param prevPts Vector of 2D points for which the flow needs to be found. + @param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image. + @param status Output status vector. Each element of the vector is set to 1 if the + flow for the corresponding features has been found. Otherwise, it is set to 0. + @param err Optional output vector that contains error response for each point (inverse confidence). + */ + CV_WRAP virtual void calc(InputArray prevImg, InputArray nextImg, + InputArray prevPts, InputOutputArray nextPts, + OutputArray status, + OutputArray err = cv::noArray()) = 0; +}; + + +/** @brief Class computing a dense optical flow using the Gunnar Farneback's algorithm. + */ +class CV_EXPORTS_W FarnebackOpticalFlow : public DenseOpticalFlow +{ +public: + CV_WRAP virtual int getNumLevels() const = 0; + CV_WRAP virtual void setNumLevels(int numLevels) = 0; + + CV_WRAP virtual double getPyrScale() const = 0; + CV_WRAP virtual void setPyrScale(double pyrScale) = 0; + + CV_WRAP virtual bool getFastPyramids() const = 0; + CV_WRAP virtual void setFastPyramids(bool fastPyramids) = 0; + + CV_WRAP virtual int getWinSize() const = 0; + CV_WRAP virtual void setWinSize(int winSize) = 0; + + CV_WRAP virtual int getNumIters() const = 0; + CV_WRAP virtual void setNumIters(int numIters) = 0; + + CV_WRAP virtual int getPolyN() const = 0; + CV_WRAP virtual void setPolyN(int polyN) = 0; + + CV_WRAP virtual double getPolySigma() const = 0; + CV_WRAP virtual void setPolySigma(double polySigma) = 0; + + CV_WRAP virtual int getFlags() const = 0; + CV_WRAP virtual void setFlags(int flags) = 0; + + CV_WRAP static Ptr create( + int numLevels = 5, + double pyrScale = 0.5, + bool fastPyramids = false, + int winSize = 13, + int numIters = 10, + int polyN = 5, + double polySigma = 1.1, + int flags = 0); +}; + +/** @brief Variational optical flow refinement + +This class implements variational refinement of the input flow field, i.e. +it uses input flow to initialize the minimization of the following functional: +\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$, +where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms +respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the +influence of outliers. A complete formulation and a description of the minimization +procedure can be found in @cite Brox2004 +*/ +class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow +{ +public: + /** @brief @ref calc function overload to handle separate horizontal (u) and vertical (v) flow components + (to avoid extra splits/merges) */ + CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0; + + /** @brief Number of outer (fixed-point) iterations in the minimization procedure. + @see setFixedPointIterations */ + CV_WRAP virtual int getFixedPointIterations() const = 0; + /** @copybrief getFixedPointIterations @see getFixedPointIterations */ + CV_WRAP virtual void setFixedPointIterations(int val) = 0; + + /** @brief Number of inner successive over-relaxation (SOR) iterations + in the minimization procedure to solve the respective linear system. + @see setSorIterations */ + CV_WRAP virtual int getSorIterations() const = 0; + /** @copybrief getSorIterations @see getSorIterations */ + CV_WRAP virtual void setSorIterations(int val) = 0; + + /** @brief Relaxation factor in SOR + @see setOmega */ + CV_WRAP virtual float getOmega() const = 0; + /** @copybrief getOmega @see getOmega */ + CV_WRAP virtual void setOmega(float val) = 0; + + /** @brief Weight of the smoothness term + @see setAlpha */ + CV_WRAP virtual float getAlpha() const = 0; + /** @copybrief getAlpha @see getAlpha */ + CV_WRAP virtual void setAlpha(float val) = 0; + + /** @brief Weight of the color constancy term + @see setDelta */ + CV_WRAP virtual float getDelta() const = 0; + /** @copybrief getDelta @see getDelta */ + CV_WRAP virtual void setDelta(float val) = 0; + + /** @brief Weight of the gradient constancy term + @see setGamma */ + CV_WRAP virtual float getGamma() const = 0; + /** @copybrief getGamma @see getGamma */ + CV_WRAP virtual void setGamma(float val) = 0; + + /** @brief Creates an instance of VariationalRefinement + */ + CV_WRAP static Ptr create(); +}; + +/** @brief DIS optical flow algorithm. + +This class implements the Dense Inverse Search (DIS) optical flow algorithm. More +details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected +parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is +still relatively fast, use DeepFlow if you need better quality and don't care about speed. + +This implementation includes several additional features compared to the algorithm described in the paper, +including spatial propagation of flow vectors (@ref getUseSpatialPropagation), as well as an option to +utilize an initial flow approximation passed to @ref calc (which is, essentially, temporal propagation, +if the previous frame's flow field is passed). +*/ +class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow +{ +public: + enum + { + PRESET_ULTRAFAST = 0, + PRESET_FAST = 1, + PRESET_MEDIUM = 2 + }; + + /** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level + corresponds to the original image resolution). The final flow is obtained by bilinear upscaling. + @see setFinestScale */ + CV_WRAP virtual int getFinestScale() const = 0; + /** @copybrief getFinestScale @see getFinestScale */ + CV_WRAP virtual void setFinestScale(int val) = 0; + + /** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well + enough in most cases. + @see setPatchSize */ + CV_WRAP virtual int getPatchSize() const = 0; + /** @copybrief getPatchSize @see getPatchSize */ + CV_WRAP virtual void setPatchSize(int val) = 0; + + /** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond + to higher flow quality. + @see setPatchStride */ + CV_WRAP virtual int getPatchStride() const = 0; + /** @copybrief getPatchStride @see getPatchStride */ + CV_WRAP virtual void setPatchStride(int val) = 0; + + /** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values + may improve quality in some cases. + @see setGradientDescentIterations */ + CV_WRAP virtual int getGradientDescentIterations() const = 0; + /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ + CV_WRAP virtual void setGradientDescentIterations(int val) = 0; + + /** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to + disable variational refinement completely. Higher values will typically result in more smooth and + high-quality flow. + @see setGradientDescentIterations */ + CV_WRAP virtual int getVariationalRefinementIterations() const = 0; + /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ + CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0; + + /** @brief Weight of the smoothness term + @see setVariationalRefinementAlpha */ + CV_WRAP virtual float getVariationalRefinementAlpha() const = 0; + /** @copybrief getVariationalRefinementAlpha @see getVariationalRefinementAlpha */ + CV_WRAP virtual void setVariationalRefinementAlpha(float val) = 0; + + /** @brief Weight of the color constancy term + @see setVariationalRefinementDelta */ + CV_WRAP virtual float getVariationalRefinementDelta() const = 0; + /** @copybrief getVariationalRefinementDelta @see getVariationalRefinementDelta */ + CV_WRAP virtual void setVariationalRefinementDelta(float val) = 0; + + /** @brief Weight of the gradient constancy term + @see setVariationalRefinementGamma */ + CV_WRAP virtual float getVariationalRefinementGamma() const = 0; + /** @copybrief getVariationalRefinementGamma @see getVariationalRefinementGamma */ + CV_WRAP virtual void setVariationalRefinementGamma(float val) = 0; + + + /** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on + by default as it typically provides a noticeable quality boost because of increased robustness to + illumination variations. Turn it off if you are certain that your sequence doesn't contain any changes + in illumination. + @see setUseMeanNormalization */ + CV_WRAP virtual bool getUseMeanNormalization() const = 0; + /** @copybrief getUseMeanNormalization @see getUseMeanNormalization */ + CV_WRAP virtual void setUseMeanNormalization(bool val) = 0; + + /** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by + default, as it tends to work better on average and can sometimes help recover from major errors + introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this + option off can make the output flow field a bit smoother, however. + @see setUseSpatialPropagation */ + CV_WRAP virtual bool getUseSpatialPropagation() const = 0; + /** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */ + CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0; + + /** @brief Creates an instance of DISOpticalFlow + + @param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM + */ + CV_WRAP static Ptr create(int preset = DISOpticalFlow::PRESET_FAST); +}; + +/** @brief Class used for calculating a sparse optical flow. + +The class can calculate an optical flow for a sparse feature set using the +iterative Lucas-Kanade method with pyramids. + +@sa calcOpticalFlowPyrLK + +*/ +class CV_EXPORTS_W SparsePyrLKOpticalFlow : public SparseOpticalFlow +{ +public: + CV_WRAP virtual Size getWinSize() const = 0; + CV_WRAP virtual void setWinSize(Size winSize) = 0; + + CV_WRAP virtual int getMaxLevel() const = 0; + CV_WRAP virtual void setMaxLevel(int maxLevel) = 0; + + CV_WRAP virtual TermCriteria getTermCriteria() const = 0; + CV_WRAP virtual void setTermCriteria(TermCriteria& crit) = 0; + + CV_WRAP virtual int getFlags() const = 0; + CV_WRAP virtual void setFlags(int flags) = 0; + + CV_WRAP virtual double getMinEigThreshold() const = 0; + CV_WRAP virtual void setMinEigThreshold(double minEigThreshold) = 0; + + CV_WRAP static Ptr create( + Size winSize = Size(21, 21), + int maxLevel = 3, TermCriteria crit = + TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), + int flags = 0, + double minEigThreshold = 1e-4); +}; + + + + +/** @brief Base abstract class for the long-term tracker + */ +class CV_EXPORTS_W Tracker +{ +protected: + Tracker(); +public: + virtual ~Tracker(); + + /** @brief Initialize the tracker with a known bounding box that surrounded the target + @param image The initial frame + @param boundingBox The initial bounding box + */ + CV_WRAP virtual + void init(InputArray image, const Rect& boundingBox) = 0; + + /** @brief Update the tracker, find the new most likely bounding box for the target + @param image The current frame + @param boundingBox The bounding box that represent the new target location, if true was returned, not + modified otherwise + + @return True means that target was located and false means that tracker cannot locate target in + current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed + missing from the frame (say, out of sight) + */ + CV_WRAP virtual + bool update(InputArray image, CV_OUT Rect& boundingBox) = 0; +}; + + + +/** @brief The MIL algorithm trains a classifier in an online manner to separate the object from the +background. + +Multiple Instance Learning avoids the drift problem for a robust tracking. The implementation is +based on @cite MIL . + +Original code can be found here + */ +class CV_EXPORTS_W TrackerMIL : public Tracker +{ +protected: + TrackerMIL(); // use ::create() +public: + virtual ~TrackerMIL() CV_OVERRIDE; + + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + //parameters for sampler + CV_PROP_RW float samplerInitInRadius; //!< radius for gathering positive instances during init + CV_PROP_RW int samplerInitMaxNegNum; //!< # negative samples to use during init + CV_PROP_RW float samplerSearchWinSize; //!< size of search window + CV_PROP_RW float samplerTrackInRadius; //!< radius for gathering positive instances during tracking + CV_PROP_RW int samplerTrackMaxPosNum; //!< # positive samples to use during tracking + CV_PROP_RW int samplerTrackMaxNegNum; //!< # negative samples to use during tracking + CV_PROP_RW int featureSetNumFeatures; //!< # features + }; + + /** @brief Create MIL tracker instance + * @param parameters MIL parameters TrackerMIL::Params + */ + static CV_WRAP + Ptr create(const TrackerMIL::Params ¶meters = TrackerMIL::Params()); + + //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; + //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; +}; + + + +/** @brief the GOTURN (Generic Object Tracking Using Regression Networks) tracker + * + * GOTURN (@cite GOTURN) is kind of trackers based on Convolutional Neural Networks (CNN). While taking all advantages of CNN trackers, + * GOTURN is much faster due to offline training without online fine-tuning nature. + * GOTURN tracker addresses the problem of single target tracking: given a bounding box label of an object in the first frame of the video, + * we track that object through the rest of the video. NOTE: Current method of GOTURN does not handle occlusions; however, it is fairly + * robust to viewpoint changes, lighting changes, and deformations. + * Inputs of GOTURN are two RGB patches representing Target and Search patches resized to 227x227. + * Outputs of GOTURN are predicted bounding box coordinates, relative to Search patch coordinate system, in format X1,Y1,X2,Y2. + * Original paper is here: + * As long as original authors implementation: + * Implementation of training algorithm is placed in separately here due to 3d-party dependencies: + * + * GOTURN architecture goturn.prototxt and trained model goturn.caffemodel are accessible on opencv_extra GitHub repository. + */ +class CV_EXPORTS_W TrackerGOTURN : public Tracker +{ +protected: + TrackerGOTURN(); // use ::create() +public: + virtual ~TrackerGOTURN() CV_OVERRIDE; + + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + CV_PROP_RW std::string modelTxt; + CV_PROP_RW std::string modelBin; + }; + + /** @brief Constructor + @param parameters GOTURN parameters TrackerGOTURN::Params + */ + static CV_WRAP + Ptr create(const TrackerGOTURN::Params& parameters = TrackerGOTURN::Params()); + + //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; + //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; +}; + + + +//! @} video_track + +} // cv + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/video.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/video.hpp new file mode 100644 index 0000000..8267b85 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/video/video.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/video.hpp" diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/world.hpp b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/world.hpp new file mode 100644 index 0000000..4902c2f --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Headers/world.hpp @@ -0,0 +1,58 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_WORLD_HPP +#define OPENCV_WORLD_HPP + +#include "opencv2/core.hpp" + +#ifdef __cplusplus +namespace cv +{ + +CV_EXPORTS_W bool initAll(); + +} + +#endif + +#endif diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Resources/Info.plist b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Resources/Info.plist new file mode 100644 index 0000000..48c337e --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/Resources/Info.plist @@ -0,0 +1,18 @@ + + + + + CFBundleName + OpenCV + CFBundleIdentifier + org.opencv + CFBundleVersion + 4.5.1 + CFBundleShortVersionString + 4.5.1 + CFBundleSignature + ???? + CFBundlePackageType + FMWK + + diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/opencv2 b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/opencv2 new file mode 100644 index 0000000..d3ba809 Binary files /dev/null and b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/A/opencv2 differ diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/Current b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/Current new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/Versions/Current @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/iOS_TNN_Demo/TNNDemo/opencv2.framework/opencv2 b/iOS_TNN_Demo/TNNDemo/opencv2.framework/opencv2 new file mode 100644 index 0000000..5dfe120 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/opencv2.framework/opencv2 @@ -0,0 +1 @@ +Versions/Current/opencv2 \ No newline at end of file diff --git a/iOS_TNN_Demo/TNNDemo/res/nanodet_sim_opt.tnnmodel b/iOS_TNN_Demo/TNNDemo/res/nanodet_sim_opt.tnnmodel new file mode 100644 index 0000000..19084aa Binary files /dev/null and b/iOS_TNN_Demo/TNNDemo/res/nanodet_sim_opt.tnnmodel differ diff --git a/iOS_TNN_Demo/TNNDemo/res/nanodet_sim_opt.tnnproto b/iOS_TNN_Demo/TNNDemo/res/nanodet_sim_opt.tnnproto new file mode 100644 index 0000000..1af2091 --- /dev/null +++ b/iOS_TNN_Demo/TNNDemo/res/nanodet_sim_opt.tnnproto @@ -0,0 +1,198 @@ +"1 210 1 4206624770 ," +"input.1 1 3 320 320 ," +" 1002 1005 1008 1011 1014 1017 1020 1023 1026 1029 1032 1035 1038 424 425 430 433 438 439 444 445 446 449 454 455 460 461 462 465 470 471 476 477 478 481 486 487 492 497 500 505 506 511 512 513 516 521 522 527 528 529 532 537 538 543 544 545 548 553 554 559 560 561 564 569 570 575 576 577 580 585 586 591 592 593 596 601 602 607 608 609 612 617 618 623 628 631 636 637 642 643 644 647 652 653 658 659 660 663 668 669 674 675 676 679 684 685 690 691 692 693 712 713 732 733 752 753 772 773 776 779 782 785 786 787 788 789 791 792 794 795 798 801 804 807 808 809 810 811 813 814 816 817 820 823 826 829 830 831 832 833 835 836 838 839 840 843 846 849 852 855 858 861 864 867 870 873 876 879 882 885 888 891 894 897 900 903 906 909 912 915 918 921 924 927 930 933 936 939 942 945 948 951 954 957 960 963 966 969 972 975 978 981 984 987 990 993 996 999 input.1 ," +"792 814 836 795 817 839 ," +" 193 ," +"Convolution Conv_0 1 1 input.1 840 1 3 24 3 3 2 2 1 1 1 -1 1 1 ," +"PReLU LeakyRelu_1 1 1 840 424 1 0 ," +"Pooling MaxPool_2 1 1 424 425 0 3 3 2 2 1 1 -1 -1 -1 0 ," +"Convolution Conv_3 1 1 425 843 24 1 24 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution Conv_4 1 1 843 846 1 24 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_5 1 1 846 430 1 0 ," +"Convolution Conv_6 1 1 425 849 1 24 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_7 1 1 849 433 1 0 ," +"Convolution Conv_8 1 1 433 852 58 1 58 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution Conv_9 1 1 852 855 1 58 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_10 1 1 855 438 1 0 ," +"Concat Concat_11 2 1 430 438 439 1 ," +"ShuffleChannel Reshape_16 1 1 439 444 2 ," +"SplitV Split_17 1 2 444 445 446 1 2 58 58 ," +"Convolution Conv_18 1 1 446 858 1 58 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_19 1 1 858 449 1 0 ," +"Convolution Conv_20 1 1 449 861 58 1 58 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_21 1 1 861 864 1 58 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_22 1 1 864 454 1 0 ," +"Concat Concat_23 2 1 445 454 455 1 ," +"ShuffleChannel Reshape_28 1 1 455 460 2 ," +"SplitV Split_29 1 2 460 461 462 1 2 58 58 ," +"Convolution Conv_30 1 1 462 867 1 58 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_31 1 1 867 465 1 0 ," +"Convolution Conv_32 1 1 465 870 58 1 58 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_33 1 1 870 873 1 58 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_34 1 1 873 470 1 0 ," +"Concat Concat_35 2 1 461 470 471 1 ," +"ShuffleChannel Reshape_40 1 1 471 476 2 ," +"SplitV Split_41 1 2 476 477 478 1 2 58 58 ," +"Convolution Conv_42 1 1 478 876 1 58 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_43 1 1 876 481 1 0 ," +"Convolution Conv_44 1 1 481 879 58 1 58 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_45 1 1 879 882 1 58 58 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_46 1 1 882 486 1 0 ," +"Concat Concat_47 2 1 477 486 487 1 ," +"ShuffleChannel Reshape_52 1 1 487 492 2 ," +"Convolution Conv_53 1 1 492 885 116 1 116 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution Conv_54 1 1 885 888 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_55 1 1 888 497 1 0 ," +"Convolution Conv_56 1 1 492 891 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_57 1 1 891 500 1 0 ," +"Convolution Conv_58 1 1 500 894 116 1 116 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution Conv_59 1 1 894 897 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_60 1 1 897 505 1 0 ," +"Concat Concat_61 2 1 497 505 506 1 ," +"ShuffleChannel Reshape_66 1 1 506 511 2 ," +"SplitV Split_67 1 2 511 512 513 1 2 116 116 ," +"Convolution Conv_68 1 1 513 900 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_69 1 1 900 516 1 0 ," +"Convolution Conv_70 1 1 516 903 116 1 116 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_71 1 1 903 906 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_72 1 1 906 521 1 0 ," +"Concat Concat_73 2 1 512 521 522 1 ," +"ShuffleChannel Reshape_78 1 1 522 527 2 ," +"SplitV Split_79 1 2 527 528 529 1 2 116 116 ," +"Convolution Conv_80 1 1 529 909 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_81 1 1 909 532 1 0 ," +"Convolution Conv_82 1 1 532 912 116 1 116 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_83 1 1 912 915 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_84 1 1 915 537 1 0 ," +"Concat Concat_85 2 1 528 537 538 1 ," +"ShuffleChannel Reshape_90 1 1 538 543 2 ," +"SplitV Split_91 1 2 543 544 545 1 2 116 116 ," +"Convolution Conv_92 1 1 545 918 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_93 1 1 918 548 1 0 ," +"Convolution Conv_94 1 1 548 921 116 1 116 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_95 1 1 921 924 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_96 1 1 924 553 1 0 ," +"Concat Concat_97 2 1 544 553 554 1 ," +"ShuffleChannel Reshape_102 1 1 554 559 2 ," +"SplitV Split_103 1 2 559 560 561 1 2 116 116 ," +"Convolution Conv_104 1 1 561 927 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_105 1 1 927 564 1 0 ," +"Convolution Conv_106 1 1 564 930 116 1 116 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_107 1 1 930 933 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_108 1 1 933 569 1 0 ," +"Concat Concat_109 2 1 560 569 570 1 ," +"ShuffleChannel Reshape_114 1 1 570 575 2 ," +"SplitV Split_115 1 2 575 576 577 1 2 116 116 ," +"Convolution Conv_116 1 1 577 936 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_117 1 1 936 580 1 0 ," +"Convolution Conv_118 1 1 580 939 116 1 116 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_119 1 1 939 942 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_120 1 1 942 585 1 0 ," +"Concat Concat_121 2 1 576 585 586 1 ," +"ShuffleChannel Reshape_126 1 1 586 591 2 ," +"SplitV Split_127 1 2 591 592 593 1 2 116 116 ," +"Convolution Conv_128 1 1 593 945 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_129 1 1 945 596 1 0 ," +"Convolution Conv_130 1 1 596 948 116 1 116 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_131 1 1 948 951 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_132 1 1 951 601 1 0 ," +"Concat Concat_133 2 1 592 601 602 1 ," +"ShuffleChannel Reshape_138 1 1 602 607 2 ," +"SplitV Split_139 1 2 607 608 609 1 2 116 116 ," +"Convolution Conv_140 1 1 609 954 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_141 1 1 954 612 1 0 ," +"Convolution Conv_142 1 1 612 957 116 1 116 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_143 1 1 957 960 1 116 116 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_144 1 1 960 617 1 0 ," +"Concat Concat_145 2 1 608 617 618 1 ," +"ShuffleChannel Reshape_150 1 1 618 623 2 ," +"Convolution Conv_151 1 1 623 963 232 1 232 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution Conv_152 1 1 963 966 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_153 1 1 966 628 1 0 ," +"Convolution Conv_154 1 1 623 969 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_155 1 1 969 631 1 0 ," +"Convolution Conv_156 1 1 631 972 232 1 232 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution Conv_157 1 1 972 975 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_158 1 1 975 636 1 0 ," +"Concat Concat_159 2 1 628 636 637 1 ," +"ShuffleChannel Reshape_164 1 1 637 642 2 ," +"SplitV Split_165 1 2 642 643 644 1 2 232 232 ," +"Convolution Conv_166 1 1 644 978 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_167 1 1 978 647 1 0 ," +"Convolution Conv_168 1 1 647 981 232 1 232 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_169 1 1 981 984 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_170 1 1 984 652 1 0 ," +"Concat Concat_171 2 1 643 652 653 1 ," +"ShuffleChannel Reshape_176 1 1 653 658 2 ," +"SplitV Split_177 1 2 658 659 660 1 2 232 232 ," +"Convolution Conv_178 1 1 660 987 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_179 1 1 987 663 1 0 ," +"Convolution Conv_180 1 1 663 990 232 1 232 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_181 1 1 990 993 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_182 1 1 993 668 1 0 ," +"Concat Concat_183 2 1 659 668 669 1 ," +"ShuffleChannel Reshape_188 1 1 669 674 2 ," +"SplitV Split_189 1 2 674 675 676 1 2 232 232 ," +"Convolution Conv_190 1 1 676 996 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_191 1 1 996 679 1 0 ," +"Convolution Conv_192 1 1 679 999 232 1 232 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution Conv_193 1 1 999 1002 1 232 232 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_194 1 1 1002 684 1 0 ," +"Concat Concat_195 2 1 675 684 685 1 ," +"ShuffleChannel Reshape_200 1 1 685 690 2 ," +"Convolution Conv_201 1 1 492 691 1 116 96 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution Conv_202 1 1 623 692 1 232 96 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution Conv_203 1 1 690 693 1 464 96 1 1 1 1 0 0 1 -1 1 1 ," +"Upsample Resize_222 1 1 693 712 2 0 0 0 20 20 ," +"Add Add_223 2 1 692 712 713 ," +"Upsample Resize_242 1 1 713 732 2 0 0 0 40 40 ," +"Add Add_243 2 1 691 732 733 ," +"Upsample Resize_262 1 1 733 752 2 0 0 0 20 20 ," +"Add Add_263 2 1 713 752 753 ," +"Upsample Resize_282 1 1 753 772 2 0 0 0 10 10 ," +"Add Add_283 2 1 693 772 773 ," +"Convolution Conv_284 1 1 733 1005 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"PReLU LeakyRelu_285 1 1 1005 776 1 0 ," +"Convolution Conv_286 1 1 776 1008 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_287 1 1 1008 779 1 0 ," +"Convolution Conv_288 1 1 779 1011 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"PReLU LeakyRelu_289 1 1 1011 782 1 0 ," +"Convolution Conv_290 1 1 782 1014 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_291 1 1 1014 785 1 0 ," +"Convolution Conv_292 1 1 785 786 1 96 112 1 1 1 1 0 0 1 -1 1 1 ," +"SplitV Split_293 1 2 786 787 788 1 2 80 32 ," +"Sigmoid Sigmoid_294 1 1 787 789 ," +"Reshape Reshape_296 1 1 789 791 0 4 4 0 80 -1 1 ," +"Permute Transpose_297 1 1 791 792 3 0 2 1 ," +"Reshape Reshape_299 1 1 788 794 0 4 4 0 32 -1 1 ," +"Permute Transpose_300 1 1 794 795 3 0 2 1 ," +"Convolution Conv_301 1 1 753 1017 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"PReLU LeakyRelu_302 1 1 1017 798 1 0 ," +"Convolution Conv_303 1 1 798 1020 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_304 1 1 1020 801 1 0 ," +"Convolution Conv_305 1 1 801 1023 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"PReLU LeakyRelu_306 1 1 1023 804 1 0 ," +"Convolution Conv_307 1 1 804 1026 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_308 1 1 1026 807 1 0 ," +"Convolution Conv_309 1 1 807 808 1 96 112 1 1 1 1 0 0 1 -1 1 1 ," +"SplitV Split_310 1 2 808 809 810 1 2 80 32 ," +"Sigmoid Sigmoid_311 1 1 809 811 ," +"Reshape Reshape_313 1 1 811 813 0 4 4 0 80 -1 1 ," +"Permute Transpose_314 1 1 813 814 3 0 2 1 ," +"Reshape Reshape_316 1 1 810 816 0 4 4 0 32 -1 1 ," +"Permute Transpose_317 1 1 816 817 3 0 2 1 ," +"Convolution Conv_318 1 1 773 1029 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"PReLU LeakyRelu_319 1 1 1029 820 1 0 ," +"Convolution Conv_320 1 1 820 1032 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_321 1 1 1032 823 1 0 ," +"Convolution Conv_322 1 1 823 1035 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"PReLU LeakyRelu_323 1 1 1035 826 1 0 ," +"Convolution Conv_324 1 1 826 1038 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"PReLU LeakyRelu_325 1 1 1038 829 1 0 ," +"Convolution Conv_326 1 1 829 830 1 96 112 1 1 1 1 0 0 1 -1 1 1 ," +"SplitV Split_327 1 2 830 831 832 1 2 80 32 ," +"Sigmoid Sigmoid_328 1 1 831 833 ," +"Reshape Reshape_330 1 1 833 835 0 4 4 0 80 -1 1 ," +"Permute Transpose_331 1 1 835 836 3 0 2 1 ," +"Reshape Reshape_333 1 1 832 838 0 4 4 0 32 -1 1 ," +"Permute Transpose_334 1 1 838 839 3 0 2 1 ," diff --git a/iOS_TNN_Demo/TNNDemo/tnn.bundle/tnn.metallib b/iOS_TNN_Demo/TNNDemo/tnn.bundle/tnn.metallib index 3b3664a..94ba935 100644 Binary files a/iOS_TNN_Demo/TNNDemo/tnn.bundle/tnn.metallib and b/iOS_TNN_Demo/TNNDemo/tnn.bundle/tnn.metallib differ diff --git a/iOS_TNN_Demo/TNNDemo/tnn.bundle/tnn.simulator.metallib b/iOS_TNN_Demo/TNNDemo/tnn.bundle/tnn.simulator.metallib new file mode 100644 index 0000000..d2e0a00 Binary files /dev/null and b/iOS_TNN_Demo/TNNDemo/tnn.bundle/tnn.simulator.metallib differ diff --git a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/core/macro.h b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/core/macro.h index 5715faa..dd9519c 100644 --- a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/core/macro.h +++ b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/core/macro.h @@ -17,6 +17,8 @@ #include #include +// disable Warning 4003 in MSVC: warning C4003: param not enough to call “TNN_NS_” +#pragma warning(disable:4003) // TNN namespcae #define TNN_NS__(x) tnn##x #define TNN_NS_(x) TNN_NS__(x) @@ -32,18 +34,18 @@ #ifdef BUILDING_DLL #ifdef __GNUC__ #define PUBLIC __attribute__((dllexport)) -#else +#else // __GNUC__ #define PUBLIC __declspec(dllexport) -#endif -#else +#endif // __GNUC__ +#else // BUILDING_DLL #ifdef __GNUC__ #define PUBLIC __attribute__((dllimport)) #else #define PUBLIC __declspec(dllimport) -#endif -#endif +#endif // __GNUC__ +#endif // BUILDING_DLL #define LOCAL -#else +#else // _WIN32 || __CYGWIN__ #if __GNUC__ >= 4 #define PUBLIC __attribute__((visibility("default"))) #define LOCAL __attribute__((visibility("hidden"))) diff --git a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/cpu_utils.h b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/cpu_utils.h index aed86b2..043f932 100644 --- a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/cpu_utils.h +++ b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/cpu_utils.h @@ -35,6 +35,10 @@ class CpuUtils { // @brief get cpu fp16 capability PUBLIC static bool CpuSupportFp16(); + + // @brief set x86 cpu denormal ftz and daz, no use for other cpu. + // @param denormal 0:turn off denormal 1:turn on denormal + PUBLIC static void SetCpuDenormal(int denormal); }; } // namespace TNN_NS diff --git a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/dims_vector_utils.h b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/dims_vector_utils.h index 2d84599..410619f 100644 --- a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/dims_vector_utils.h +++ b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/utils/dims_vector_utils.h @@ -27,19 +27,19 @@ class PUBLIC DimsVectorUtils { public: // @brief all dims product, [start_index, end_index) // @param dims - static int Count(DimsVector dims, int start_index = 0, int end_index = -1); + static int Count(const DimsVector &dims, int start_index = 0, int end_index = -1); // @brief max of dims0 and dims1, [start_index, end_index) - static DimsVector Max(DimsVector dims0, DimsVector dims1, int start_index = 0, int end_index = -1); + static DimsVector Max(const DimsVector &dims0, const DimsVector &dims1, int start_index = 0, int end_index = -1); // @brief equal of dims0 and dims1, [start_index, end_index) - static bool Equal(DimsVector dims0, DimsVector dims1, int start_index = 0, int end_index = -1); + static bool Equal(const DimsVector &dims0, const DimsVector &dims1, int start_index = 0, int end_index = -1); // @brief NCHW dims vector to NHWC dims vector - static DimsVector NCHW2NHWC(DimsVector dims); + static DimsVector NCHW2NHWC(const DimsVector &dims); // @brief NHWC dims vector to NCHW - static DimsVector NHWC2NCHW(DimsVector dims); + static DimsVector NHWC2NCHW(const DimsVector &dims); }; } // namespace TNN_NS diff --git a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/version.h b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/version.h index 0081a79..50b4169 100644 --- a/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/version.h +++ b/iOS_TNN_Demo/TNNDemo/tnn.framework/Headers/version.h @@ -15,7 +15,7 @@ #define TNN_INCLUDE_TNN_VERSION_H_ static char *branch_name_tnn = "master"; -static char *commit_date_tnn = "2021-01-08"; -static char *commit_hash_tnn = "4974e3d"; +static char *commit_date_tnn = "2021-03-05"; +static char *commit_hash_tnn = "97fed1f"; #endif //TNN_INCLUDE_TNN_VERSION_H_ diff --git a/iOS_TNN_Demo/TNNDemo/tnn.framework/Info.plist b/iOS_TNN_Demo/TNNDemo/tnn.framework/Info.plist index 878931d..19378ce 100644 Binary files a/iOS_TNN_Demo/TNNDemo/tnn.framework/Info.plist and b/iOS_TNN_Demo/TNNDemo/tnn.framework/Info.plist differ diff --git a/iOS_TNN_Demo/TNNDemo/tnn.framework/tnn b/iOS_TNN_Demo/TNNDemo/tnn.framework/tnn index 07c10ff..3830e03 100644 Binary files a/iOS_TNN_Demo/TNNDemo/tnn.framework/tnn and b/iOS_TNN_Demo/TNNDemo/tnn.framework/tnn differ