Skip to content

Commit

Permalink
core:ppc Several improvements on VSX(1)
Browse files Browse the repository at this point in the history
 * remove unnecessary defines from vsx_utils
 * fix v_load_expand, load lower 64bit
 * use vec_ld, vec_st with alignment load/store on all types except 64bit
 * map v_extract to v_rotate_right
 * update license header
 * enable VSX by default on clang since opencv#11167
  • Loading branch information
seiko2plus committed Apr 11, 2018
1 parent 47134fa commit 56ec10b
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 197 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC )
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF (CV_GCC AND PPC64LE) )
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF ((CV_GCC OR CV_CLANG) AND PPC64LE) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
Expand Down
125 changes: 32 additions & 93 deletions modules/core/include/opencv2/core/hal/intrin_vsx.hpp
Original file line number Diff line number Diff line change
@@ -1,46 +1,6 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html

#ifndef OPENCV_HAL_VSX_HPP
#define OPENCV_HAL_VSX_HPP
Expand Down Expand Up @@ -276,34 +236,38 @@ OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)

#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \
inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(ld_func(0, ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(ld_func(0, ptr)); } \
{ return _Tpvec(ld(0, ptr)); } \
inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \
{ return _Tpvec(ld_a(0, ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(vec_ld_l8(ptr)); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ st_func(a.val, 0, ptr); } \
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
{ st_func(a.val, 0, ptr); } \
{ st(a.val, 0, ptr); } \
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
{ st_a(a.val, 0, ptr); } \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ vec_st_l8(a.val, ptr); } \
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
{ vec_st_h8(a.val, ptr); }

OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)

OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar)
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar)
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort)
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short)
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint)
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int)
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)

OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)

//////////////// Value reordering ///////////////

Expand Down Expand Up @@ -343,7 +307,7 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
b1.val = fl(a.val); \
} \
inline _Tpwvec v_load_expand(const _Tp* ptr) \
{ return _Tpwvec(fh(vsx_ld(0, ptr))); }
{ return _Tpwvec(fh(vec_ld_l8(ptr))); }

OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
Expand All @@ -353,10 +317,10 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpac
OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)

inline v_uint32x4 v_load_expand_q(const uchar* ptr)
{ return v_uint32x4(vec_ld_buw(ptr)); }
{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }

inline v_int32x4 v_load_expand_q(const schar* ptr)
{ return v_int32x4(vec_ld_bsw(ptr)); }
{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }

/* pack */
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
Expand Down Expand Up @@ -429,36 +393,6 @@ inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
d.val = vec_mergesql(a.val, b.val);
}

/* Extract */
template<int s, typename _Tpvec>
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
{
const int w = sizeof(typename _Tpvec::lane_type);
const int n = _Tpvec::nlanes;
const unsigned int sf = ((w * n) - (s * w));
if (s == 0)
return _Tpvec(a.val);
else if (sf > 15)
return _Tpvec();
// bitwise it just to make xlc happy
return _Tpvec(vec_sld(b.val, a.val, sf & 15));
}

#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec) \
template<int s> \
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
{ \
switch(s) { \
case 0: return _Tpvec(a.val); \
case 2: return _Tpvec(b.val); \
case 1: return _Tpvec(vec_sldw(b.val, a.val, 2)); \
default: return _Tpvec(); \
} \
}
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)


////////// Arithmetic, bitwise and comparison operations /////////

/* Element-wise binary and unary operations */
Expand Down Expand Up @@ -669,6 +603,11 @@ OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a)
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)

/* Extract */
template<int s, typename _Tpvec>
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
{ return v_rotate_right<s>(a, b); }

////////// Reduce and mask /////////

/** Reduce **/
Expand Down
Loading

0 comments on commit 56ec10b

Please sign in to comment.